From 96482703cd0c565dea623b07ba52cd2d4691c570 Mon Sep 17 00:00:00 2001 From: Josh L Date: Tue, 6 May 2025 23:26:24 +0000 Subject: [PATCH 01/57] Filling out template with PR 5434 --- proposals/p5434.md | 70 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 proposals/p5434.md diff --git a/proposals/p5434.md b/proposals/p5434.md new file mode 100644 index 0000000000000..495ef23781c38 --- /dev/null +++ b/proposals/p5434.md @@ -0,0 +1,70 @@ +# `ref` + + + +[Pull request](https://github.com/carbon-language/carbon-lang/pull/5434) + + + +## Table of contents + +- [Abstract](#abstract) +- [Problem](#problem) +- [Background](#background) +- [Proposal](#proposal) +- [Details](#details) +- [Rationale](#rationale) +- [Alternatives considered](#alternatives-considered) + + + +## Abstract + +TODO: Describe, in a succinct paragraph, the gist of this document. This +paragraph should be reproduced verbatim in the PR summary. + +## Problem + +TODO: What problem are you trying to solve? How important is that problem? Who +is impacted by it? + +## Background + +TODO: Is there any background that readers should consider to fully understand +this problem and your approach to solving it? + +## Proposal + +TODO: Briefly and at a high level, how do you propose to solve the problem? Why +will that in fact solve it? + +## Details + +TODO: Fully explain the details of the proposed solution. + +## Rationale + +TODO: How does this proposal effectively advance Carbon's goals? Rather than +re-stating the full motivation, this should connect that motivation back to +Carbon's stated goals and principles. This may evolve during review. Use links +to appropriate sections of [`/docs/project/goals.md`](/docs/project/goals.md), +and/or to documents in [`/docs/project/principles`](/docs/project/principles). +For example: + +- [Community and culture](/docs/project/goals.md#community-and-culture) +- [Language tools and ecosystem](/docs/project/goals.md#language-tools-and-ecosystem) +- [Performance-critical software](/docs/project/goals.md#performance-critical-software) +- [Software and language evolution](/docs/project/goals.md#software-and-language-evolution) +- [Code that is easy to read, understand, and write](/docs/project/goals.md#code-that-is-easy-to-read-understand-and-write) +- [Practical safety and testing mechanisms](/docs/project/goals.md#practical-safety-and-testing-mechanisms) +- [Fast and scalable development](/docs/project/goals.md#fast-and-scalable-development) +- [Modern OS platforms, hardware architectures, and environments](/docs/project/goals.md#modern-os-platforms-hardware-architectures-and-environments) +- [Interoperability with and migration from existing C++ code](/docs/project/goals.md#interoperability-with-and-migration-from-existing-c-code) + +## Alternatives considered + +TODO: What alternative solutions have you considered? From a0b90c7a867651907776496eff1e07ae8571549f Mon Sep 17 00:00:00 2001 From: Josh L Date: Tue, 6 May 2025 23:29:47 +0000 Subject: [PATCH 02/57] Checkpoint progress. --- proposals/p5434.md | 68 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/proposals/p5434.md b/proposals/p5434.md index 495ef23781c38..08e1213e82dcd 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -46,6 +46,74 @@ will that in fact solve it? TODO: Fully explain the details of the proposed solution. +```carbon +// `bound` works like [[clang::lifetimebound]] +// https://clang.llvm.org/docs/AttributeReference.html#id8 + +fn F(bound x: i32) -> ref i32 { + var y: i32 = x; + // reject + return y; +} + +fn G(bound p: i32*) -> ref i32 { + // accept + return *p; +} + +fn H(bound var x: i32) -> ref i32 { + // reject + return x; +} + +class C { + // accept + fn CF[bound ref self: Self]() -> ref i32 { return self.m; } + + // reject + fn CG[bound self: Self]() -> ref i32 { return self.m; } + + // reject + fn CH[bound var self: Self]() -> ref i32 { return self.m; } + + var m: i32; +} + +// Would be marked "pointer" if we decide to support that +class D { + // accept + fn DF[bound ref self: Self]() -> ref i32 { return *self.pm; } + + // accept + fn DG[bound self: Self]() -> ref i32 { return *self.pm; } + + // accept + fn DH[bound var self: Self]() -> ref i32 { return *self.pm; } + + var pm: i32*; +} + +class E { + fn Make() -> E { + return var ret: E; + ret.m = 0; + ret.pm = &ret.m; + return var; + } + // accept + fn EF[bound ref self: Self]() -> ref i32 { return *self.pm; } + + // could reject if we had an annotation saying E is not pointer + fn EG[bound self: Self]() -> ref i32 { return *self.pm; } + + // could reject if we had an annotation saying E is not pointer + fn EH[bound var self: Self]() -> ref i32 { return *self.pm; } + + var pm: i32*; + var m: i32; +} +``` + ## Rationale TODO: How does this proposal effectively advance Carbon's goals? Rather than From 7cc4b32cae30cf12db62612fc78d2a5ee685884d Mon Sep 17 00:00:00 2001 From: Josh L Date: Mon, 12 May 2025 03:25:09 +0000 Subject: [PATCH 03/57] Checkpoint progress. --- proposals/p5434.md | 186 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 176 insertions(+), 10 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 08e1213e82dcd..51b499135a8d5 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -17,6 +17,17 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Background](#background) - [Proposal](#proposal) - [Details](#details) + - [Use case: `Deref` interface](#use-case-deref-interface) + - [Use case: indexing interfaces](#use-case-indexing-interfaces) + - [Use case: class accessors](#use-case-class-accessors) + - [Use case: `OptionalRef`](#use-case-optionalref) +- [Future work](#future-work) + - [`ref` bindings as fields](#ref-bindings-as-fields) + - [`ref` bindings in lambdas](#ref-bindings-in-lambdas) + - [Interaction with effects](#interaction-with-effects) + - [More precise lifetimes](#more-precise-lifetimes) + - [Combining with compile-time bindings](#combining-with-compile-time-bindings) + - [Tuple parameters with `ref` elements](#tuple-parameters-with-ref-elements) - [Rationale](#rationale) - [Alternatives considered](#alternatives-considered) @@ -24,27 +35,140 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ## Abstract -TODO: Describe, in a succinct paragraph, the gist of this document. This -paragraph should be reproduced verbatim in the PR summary. +- A parameter binding can be marked `ref` instead of `var` or the default. It + will bind to reference argument expressions in the caller and produces a + reference expression in the callee. + - Unlike pointers, a `ref` binding may not be rebound to a different + object. + - This replaces `addr`, and is not restricted to the `self` parameter. + - A `ref` binding may not be used in fields of classes or structs. +- The return of a function can optionally be marked `ref`, `let`, or `var`. + These control the category of the call expression invoking the function, and + how the return expression is returned. + - This applies to individual components for functions returning parens or + brace forms. +- Parameters that the return may reference the storage of must be marked + `bound`. +- The address of a `ref` binding is `nocapture` and `noalias`. A `ref` + parameter of a function may be referenced by the return value if the `ref` + binding is also marked `bound`. ## Problem -TODO: What problem are you trying to solve? How important is that problem? Who +FIXME: What problem are you trying to solve? How important is that problem? Who is impacted by it? +Reference bindings have come up multiple times: + +- Better alternative to `addr self: Self*` +- Lambda captures +- [Nested bindings within a destructured `var`](https://github.com/carbon-language/carbon-lang/issues/5250) +- Forwarding of arguments preserving expression category. + +They also closely match the expression category. + ## Background -TODO: Is there any background that readers should consider to fully understand -this problem and your approach to solving it? +FIXME: reference expressions + +FIXME: `addr self` + +[leads issue #5261: We should add `ref` bindings to to Carbon, paralleling reference expressions](https://github.com/carbon-language/carbon-lang/issues/5261) + +FIXME: +[LLVM `noalias` attribute](https://llvm.org/docs/LangRef.html#function-attributes), +[LLVM pointer aliasing rules](https://llvm.org/docs/LangRef.html#pointer-aliasing-rules) + +FIXME: +[LLVM `nocapture` attribute](https://releases.llvm.org/11.0.0/docs/LangRef.html#parameter-attributes) +has become +[`captures(none)` and `captures(ret: address, provenance)`](https://llvm.org/docs/LangRef.html#function-attributes) +with [pointer capture rules](https://llvm.org/docs/LangRef.html#pointer-capture) + +FIXME: +[`clang::lifetimebound` attribute](https://clang.llvm.org/docs/AttributeReference.html#id8) ## Proposal -TODO: Briefly and at a high level, how do you propose to solve the problem? Why -will that in fact solve it? +The keyword `ref` marks a `:` binding as binding to a reference expression, as +in: + +```carbon +fn F(ptr: i32*) { + // A reference binding `x`. + let ref x: i32 = *ptr; + + // Use of `x` is a reference expression that + // refers to the same object as `*ptr`. + Assert(&x == ptr); + + // Equivalent to `*ptr += 1;`. + x += 1; +} + +fn G() { + var y: i32 = 2; + F(&y); + Assert(y == 3); +} +``` + +The use of the name (`x` in the example) of a `ref` binding forms a reference +expression. The address of a `ref` bound name gives the address of the bound +object, so `&x == ptr` above. The reference itself does not have an address, and +unlike a pointer can't be rebound to reference a different object. + +We remove `addr`, and use instead use `ref` for the `self` parameter when an +object is required. Note that the type will change from `Self*` to `Self` in +this case. + +```carbon +class C { + // ❌ No longer valid. + fn OldMethod[addr self: Self*]() { + // Previously would dereference `self` in + // the body of the method. + self->x += 3; + } + + // ✅ Now valid. + fn NewMethod[ref self: Self]() { + // Now `self` is a reference expression, + // and is not dereferenced. + self.x += 3; + } + + // ✅ Other uses are unchanged. + fn Get[self: Self]() -> i32 { + return self.x; + } + + var x: i32; +} +``` + +The `ref` modifier is allowed on any `:` parameter or `let` binding. It is +forbidden on `class` or `struct` fields. + +As an _experiment_, we are saying a pointer formed by taking the address of a +`ref` bound name is LLVM-`nocapture` and LLVM-`noalias`. FIXME: finish + +FIXME: The return of a function can optionally be marked `ref`, `let`, or `var`. +These control the category of the call expression invoking the function, and how +the return expression is returned. + +- This applies to individual components for functions returning parens or + brace forms. FIXME: need to support returning paren patterns that have some + elements that are `ref` for things like `enumerate`. + +FIXME: Parameters that the return may reference the storage of must be marked +`bound`. A `ref` parameter of a function may be referenced by the return value +if the `ref` binding is also marked `bound`. Same semantics as +[`clang::lifetimebound` attribute](https://clang.llvm.org/docs/AttributeReference.html#id8) ## Details -TODO: Fully explain the details of the proposed solution. +FIXME: Fully explain the details of the proposed solution. ```carbon // `bound` works like [[clang::lifetimebound]] @@ -114,9 +238,51 @@ class E { } ``` +### Use case: `Deref` interface + +FIXME + +### Use case: indexing interfaces + +FIXME + +### Use case: class accessors + +FIXME + +### Use case: `OptionalRef` + +FIXME + +## Future work + +### `ref` bindings as fields + +FIXME + +### `ref` bindings in lambdas + +FIXME + +### Interaction with effects + +FIXME + +### More precise lifetimes + +FIXME + +### Combining with compile-time bindings + +FIXME + +### Tuple parameters with `ref` elements + +FIXME + ## Rationale -TODO: How does this proposal effectively advance Carbon's goals? Rather than +FIXME: How does this proposal effectively advance Carbon's goals? Rather than re-stating the full motivation, this should connect that motivation back to Carbon's stated goals and principles. This may evolve during review. Use links to appropriate sections of [`/docs/project/goals.md`](/docs/project/goals.md), @@ -135,4 +301,4 @@ For example: ## Alternatives considered -TODO: What alternative solutions have you considered? +FIXME: What alternative solutions have you considered? From bf4930fe5af3a2919a4766d3b4123427a2524b27 Mon Sep 17 00:00:00 2001 From: Josh L Date: Fri, 16 May 2025 04:33:14 +0000 Subject: [PATCH 04/57] Checkpoint progress. --- proposals/p5434.md | 530 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 520 insertions(+), 10 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 51b499135a8d5..15a8e94f35785 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -17,19 +17,38 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Background](#background) - [Proposal](#proposal) - [Details](#details) + - [Return conventions](#return-conventions) + - [Composite return forms and patterns](#composite-return-forms-and-patterns) + - [`bound` parameters](#bound-parameters) + - [Details of lifetimes](#details-of-lifetimes) + - [How addresses interact with `ref`](#how-addresses-interact-with-ref) + - [Improved C++ interop and migration](#improved-c-interop-and-migration) + - [Details of impact on the type system](#details-of-impact-on-the-type-system) + - [Interaction with `returned var`](#interaction-with-returned-var) - [Use case: `Deref` interface](#use-case-deref-interface) - [Use case: indexing interfaces](#use-case-indexing-interfaces) - [Use case: class accessors](#use-case-class-accessors) - [Use case: `OptionalRef`](#use-case-optionalref) - [Future work](#future-work) - - [`ref` bindings as fields](#ref-bindings-as-fields) - [`ref` bindings in lambdas](#ref-bindings-in-lambdas) - [Interaction with effects](#interaction-with-effects) - [More precise lifetimes](#more-precise-lifetimes) - [Combining with compile-time bindings](#combining-with-compile-time-bindings) - [Tuple parameters with `ref` elements](#tuple-parameters-with-ref-elements) + - [Interaction with `Call` or other interfaces](#interaction-with-call-or-other-interfaces) - [Rationale](#rationale) - [Alternatives considered](#alternatives-considered) + - [No `ref`, only pointers](#no-ref-only-pointers) + - [Remove pointers after adding references](#remove-pointers-after-adding-references) + - [All `ref` bindings in the fields of classes](#all-ref-bindings-in-the-fields-of-classes) + - [No call-site annotation](#no-call-site-annotation) + - [Top-level `ref` introducer](#top-level-ref-introducer) + - [`ref` as a type qualifier](#ref-as-a-type-qualifier) + - [Temporary lifetime extension](#temporary-lifetime-extension) + - [`bound` would change the default return to `let`](#bound-would-change-the-default-return-to-let) + - [Other return conventions](#other-return-conventions) + - [`return var` with composite return forms](#return-var-with-composite-return-forms) + - [Other syntax for compound return forms](#other-syntax-for-compound-return-forms) @@ -42,13 +61,15 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception object. - This replaces `addr`, and is not restricted to the `self` parameter. - A `ref` binding may not be used in fields of classes or structs. + - When calling functions, arguments to non-`self` `ref` parameters are + also marked with `ref`. - The return of a function can optionally be marked `ref`, `let`, or `var`. These control the category of the call expression invoking the function, and how the return expression is returned. - This applies to individual components for functions returning parens or brace forms. -- Parameters that the return may reference the storage of must be marked - `bound`. +- Any parameters whose lifetime needs to contain the lifetime of the return + must be marked `bound`. - The address of a `ref` binding is `nocapture` and `noalias`. A `ref` parameter of a function may be referenced by the return value if the `ref` binding is also marked `bound`. @@ -147,9 +168,14 @@ class C { } ``` +Potentially abbreviating the syntax further is left as future work. + The `ref` modifier is allowed on any `:` parameter or `let` binding. It is forbidden on `class` or `struct` fields. +FIXME: When calling functions, arguments to non-`self` `ref` parameters are also +marked with `ref`. + As an _experiment_, we are saying a pointer formed by taking the address of a `ref` bound name is LLVM-`nocapture` and LLVM-`noalias`. FIXME: finish @@ -166,9 +192,69 @@ FIXME: Parameters that the return may reference the storage of must be marked if the `ref` binding is also marked `bound`. Same semantics as [`clang::lifetimebound` attribute](https://clang.llvm.org/docs/AttributeReference.html#id8) +FIXME: Exact structure of how `ref` is attached within patterns w.r.t. +destructuring, etc. + ## Details -FIXME: Fully explain the details of the proposed solution. +FIXME: Expectation that we would have a goal to use references instead of +pointers when possible. + +### Return conventions + +- `ref`: callee returns address + - call is reference expression +- `var`: caller passes in address to initialize, callee constructs object at + that address + - call is initializing expression + - guarantees that `returned var` is the same variable in the caller and + callee +- `let`: callee returns by value, using the value representation which could + be copy, reference, or custom + - call is a value expression +- default: + - copy in registers if the initializing representation is a copy + - `var` convention otherwise + - produces an initializing expression + +### Composite return forms and patterns + +Mirroring the paren and brace pattern forms, we also support paren and brace +return forms. FIXME: link to pattern docs. + +```carbon +// Paren return form +fn ParenReturn(...) + -> (-> bool, ->let f32, ->var C, ->ref i32); + +let (var a: bool, b: f32, var c: C, ref d: D) + = ParenReturn(...); + +fn BraceReturn(...) + -> {-> .a: bool, + ->let .b: f32, + ->var .c: C, + ->ref .d: i32}; + +// Binds to the names `x`, `y`, `z`, `w`: +let {.a = var x: bool, + .b = let y: f32, + .c = var z: C, + .d = ref w: i32} = BraceReturn(...); + +// Binds to the names `a`, `b`, `c`, `d`: +let {var a: bool, + let b: f32, + var c: C, + ref d: i32} = BraceReturn(...); +``` + +FIXME: We add support for `var`, `let`, and `ref` to patterns in these positions +as well. + +### `bound` parameters + +FIXME ```carbon // `bound` works like [[clang::lifetimebound]] @@ -238,6 +324,198 @@ class E { } ``` +FIXME: If a temporary argument is bound to a return that outlives it, it is an +error. + +FIXME: Can write `bound` the parameters of a function member of an interface. +The `impl` of that interface for a type can only have `bound` in a subset of the +positions present in the interface. + +FIXME: Like C++, `bound` does not affect semantics or calling conventions, just +what code is legal. Helps with the discrepancy between interface and impl +functions. + +FIXME: Can a `var`/`ref`/`let` return be bound to a `var`/`ref`/`let` parameter? +Yes in all 9 combinations. Examples: + +``` +fn RefToLet(bound ref x: C) -> let D { return x.d; } +fn LetToRef(bound y: C) -> ref D { return *y.ptr; } +fn VarToRef(bound var p: i32*) -> ref i32 { return *p; } +fn VarToVar(bound var p: i32*) -> var i32* { return p; } +``` + +For full safety, need a bound variable to be immutably borrowed for the duration +of the lifetime of the returned result. However, goal for now is just matching +`[[clang::lifetimebound]]`, which just has the goal of preventing some classes +of bugs, not full memory safety. We will reconsider this with the memory safety +design. + +### Details of lifetimes + +We should ensure that reference expressions formed by way of reference bindings +_do not dangle_. + +So for any reference expression that has a known lifetime already in the +language, such as those associated with temporaries or `var` declarations, we +should either lifetime-extend (in the case of temporaries) or error (in the case +of declarations) when trying to form a binding that would outlive the referenced +object. + +For reference expressions without known lifetimes currently such as dereferenced +pointers, while we should allow them despite unsafety today, we should fully +expect lifetime safety in Carbon to eventually introduce a way of reasoning +about these lifetimes and with that a requirement that the lifetime of the +binding be satisfied. That should be explicitly expected as future work and part +of getting an overall safety story for Carbon. + +This does fundamentally mean that we now have another kind of "pointer", +potentially adding complexity to any memory-safety story. However, I think this +ship already sailed to some extent with value bindings. Fundamentally, bindings +are allowed to have pointer-like semantics from a lifetime perspective, and so +will need to be considered as a pointer-like thing as we build out lifetime +safety. + +### How addresses interact with `ref` + +The suggested model is that `ref` bindings mirror reference expressions in that +they refer back to some underlying object. As a consequence, it should be +possible to take the address of a `ref` binding and get the address of that +object. + +However, we expect reference expressions and as consequence `ref` bindings to +work more like Swift `inout` than like a pointer: there may be implicit copies +or moves that occur prior to forming the reference expression, or binding it to +a name. The goal is that it should be possible for some types to implement `ref` +parameters through move-in / move-out semantics. + +When we have a `ref` _binding_ specifically, we expect its address to be +_stable_ for the lifetime of the binding. And there is no valid move-in/move-out +semantic model for _overlapping_ bindings -- those must all reference the same +underlying object, and the address of those must all match in addition to being +stable. But for non-overlapping bindings such as parameters, a move-in/move-out +model should be equally valid from the perspective of the `ref` binding, and the +address within the function might be different from the address in the caller. + +At least in cases where a type permits move-in/move-out, the address of a `ref` +parameter should be implicitly `nocapture` in LLVM's semantic model for example. +Whether we go further and restrict `ref` to be LLVM-`nocapture` more broadly is +an open question that can likely also be an area for future work. + +FIXME: also `noalias` see +https://github.com/carbon-language/carbon-lang/issues/5261#issuecomment-2843057728 + +FIXME: + +- `noalias` means like C restrict; you can't observe mutations through + aliases; mutation through a restricted pointer is not observable through + another pointer +- `captures(none)` means no transitive escape, can pass a nocapture pointer to + another nocapture function, can't store to memory or return +- `captures(ret: address, provenance)`: is like `captures(none)` but may be + referenced by a return. + +Will use `bound` marker (FIXME: link bound section) to pick between these last +two options. + +The `noalias` semantics are the minimum for the optimization. But this condition +is hard to check, so safe code will use a stricter criteria. Unsafe code will be +required to adhere to the looser `noalias` restrictions, but will not be checked +(except possibly by a sanitizer at runtime). The details here will be tackled as +part of the memory safety design. + +Optimizations will only be performed based on information that is enforced or +checked by the compiler, so these attributes won't be passed to LLVM unless +their requirements can be established. This avoids introducing undefined +behavior, which we particularly don't want to do in situations where C++ +doesn't. + +Outcome going for: nudges us towards function boundaries that don't kind of +constructively create aliasing in their API boundary and don't capture pointers +unnecessarily. I think we can actually enforce the nocapture side of this and we +should reject constructive aliasing between arguments. And if that works, we get +the benefit that this will cause programmers to use patterns that don't +introduce aliasing that inhibits optimizations. + +Two ways in which optimizer runs into aliasing problems: data structures on the +heap where we want to disable optimizations that rely on assuming pointers don't +alias, which is largely unavoidable. Second way, is at a function boundary, but +this can often be avoided with a different function API. The goal is to +encourage function APIs that don't use create aliases, or it is checkable at +call sites when it is happening. + +Important property: reference bindings nest lexically. Don't have a reference +binding whose lifetime partially overlaps. Means the inner reference binding +would have to be based on the outer one anyway, which I think makes it okay. If +have a `let` binding to the same object, we need to prevent modifications to +that object, whether that is through a `ref` binding or any other way. This +suggests not allowing a `ref` binding to another function while a `let` binding +is active, an not allowing the same object to be passed as both a `ref` and +`let` to the same function. + +These restrictions are experimental, and we should keep track of everything we +end up needing to do to work around these restrictions so any reconsideration +can be properly informed. + +### Improved C++ interop and migration + +We expect this to improve interop and migration by allowing significantly more +interface similarity between Carbon and C++. Previously, many things in C++ that +used references on interface boundaries would be forced to switch to pointers. +This adds ergonomic friction both at a basic level because of the forced change +but also a deeper level because it will make it significantly harder to see the +parallel usage across the boundary between C++ and Carbon. With reference +bindings, the vast majority of this dissonance will be removed. + +FIXME: Concern from +[open discussion on 2025-05-01](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.dffumsu6wzlc) +is that the `nocapture` and `noalias` modifiers don't match C++ restrictions, +particularly on the `this` parameter that we are going to require migrate to +`ref self`. + +### Details of impact on the type system + +These will ultimately be part of the type system, but the goal is for them to +only be part of the type system through patterns used in the type system: +function parameters, etc. + +Specifically, we don't expect them to be part of the _object types_ in Carbon, +but only part of the expression categories and bindings within patterns. In this +regard, they are very similar to _value_ bindings -- we retain a great deal of +implementation flexibility around layout, etc. + +This specifically means we will need to incorporate `ref` bindings into the +`Call` interface and we will be adding complexity there that will need to be +handled by overloading. The overloading impact specifically is likely future +work, but will at least carry additional complexity to handle `ref`. + +### Interaction with `returned var` + +The rule is: `returned var` may only be used when there is a single component to +the return form, and it is either `->var` or default `->`. + +```carbon +fn F(...) -> var V { + returned var v: V = ...; + // ... + return var; +} + +fn F(...) -> T { + returned var ret: T = ...; + // ... + return var; +} + +fn F(...) -> {->var .a : T} { + returned var ret: T = ...; + // ... + return var; +} +``` + +Can revisit later if motivated. + ### Use case: `Deref` interface FIXME @@ -256,17 +534,14 @@ FIXME ## Future work -### `ref` bindings as fields - -FIXME - ### `ref` bindings in lambdas -FIXME +FIXME: might be a reason to support `ref` bindings as fields, with all the +restrictions that comes with that. ### Interaction with effects -FIXME +FIXME: `Optional`, errors/`Result`, co-routines, async ### More precise lifetimes @@ -280,6 +555,14 @@ FIXME FIXME +### Interaction with `Call` or other interfaces + +For now, `ref` is not represented in the `Call` interface introduced in +[proposal #2875: Functions, function types, and function calls](https://github.com/carbon-language/carbon-lang/pull/2875). +This will be tackled together in a future proposal with other aspects of +bindings not represented by the type, such as `var` and compile-time, along with +being generic across these aspects of bindings. + ## Rationale FIXME: How does this proposal effectively advance Carbon's goals? Rather than @@ -302,3 +585,230 @@ For example: ## Alternatives considered FIXME: What alternative solutions have you considered? + +Discussed in open discussion on: + +- [2025-05-01](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.dffumsu6wzlc) +- [2025-05-06](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.s42g5iv67d3c) +- 2025-05-07 + [a](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.sfx9d7ltud5) + [b](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.4zbo49wg5rmk) +- [2025-05-08](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.vdognq1upsf5) +- [2025-05-12](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.1mjh6unumnwu) +- [2025-05-13](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.bdznj2d0by2g) +- [2025-05-14](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.52tb7l2he343) + +### No `ref`, only pointers + +- [2025-05-01](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.dffumsu6wzlc) + +Reasons to add references: + +There is a tension between wanting to have mutating expressions and only having +pointers. Think you need some concept like a reference in order to mutate an +object with an object with an expression. Question is how small a box it is +restricted to, and where the line is drawn. C has lvalues, which contain +references but are restricted to a quite small box. Reference bindings +specifically about keeping a small box around references while still enough +expressivity. Started with a C-ish model, but it fell down when it comes down to +composition. Decomposing an expression into pieces loses the tools the +expression provided to you. Missing piece was reference bindings. + +Saw how much we were leaning on value bindings. Also the asymmetry between +having value binding but not referenced bindings when have value expressions and +reference expressions was creating pressure. Example: accessing members of an +object, had to escape to pointers in that operator. + +Expectation that we would have a goal to use references instead of pointers when +possible. + +- One of the features of pattern matching we want is being able to modify + things after they have been matched. Will be a reference. +- Refactoring code without changing all the uses of a name. Already seeing + this problem with `self` and `addr self`, and would be a point of friction + in local pattern matching in the future. +- Expected to help increase the expressiveness of lambda captures. + +Counter: good to see indirections when they are present + +### Remove pointers after adding references + +Need to have something rebindable in an assignable object. The viable path +forward without separate pointers and references is to have something rebindable +like pointers but automatically dereferenced like references, which is the +approach Rust takes. + +One of the features of a reference is what it cannot do, so can't build a +pointer out of references. + +### All `ref` bindings in the fields of classes + +FIXME + +Did not see a reason to put references into objects, so keeping references out +of types. + +### No call-site annotation + +FIXME +[2025-05-07](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.sfx9d7ltud5) + +The marking is not about lifetime, but ability to mutate. A `let` may reference +an object in a similar way to a `ref`, restricting operations on the original +object, but we are not going to mark `let`s since those restrictions are +enforced by the compiler. + +Swift `inout` parameters are marked at caller with an `&` before the argument +https://docs.swift.org/swift-book/documentation/the-swift-programming-language/functions/#In-Out-Parameters + +On the other hand, not marking is not known to be a source of bugs. + +This is a "try it and see how well it works" sort of decision. + +### Top-level `ref` introducer + +For now, we don't believe `let ref` to be so common as to need a shorter way to +write, unlike what we do for `var. + +### `ref` as a type qualifier + +FIXME: big concern is effects, but also representing argument lists with tuples + +### Temporary lifetime extension + +FIXME: No. Neither Rust nor C++ does this in these cases. + +### `bound` would change the default return to `let` + +[2025-05-01](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.dffumsu6wzlc) +and +[2025-05-08](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.vdognq1upsf5) + +FIXME: Before we realized that `bound` is relevant for all return conventions. +Changing defaults is action at a distance, creating unexpected changes for +readers of the code. Don't want the return category to a function in an +interface using `bound`, and the same function in the `impl` of that `interface` +not. Lifetimes in Rust and Clang's `lifetimebound` don't change calling +conventions, only what code is valid. + +### Other return conventions + +[2025-05-08](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.vdognq1upsf5) +and +[2025-05-12](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.1mjh6unumnwu) + +We also considered other conventions for returning from functions, most notably: + +- **var without storage**: callee returns pointer to the storage of a + subobject of a `bound var` parameter, that caller is then responsible for + destroying. A call to this function is reference expression, but with + additional responsibility to destroy. +- **hybrid**: if copy value representation or trivial destructive move then + return the object representation directly; otherwise caller passes a pointer + and callee initializes it. + +There were also some variations on what the conditions for returning in +registers using the default return convention. + +We considered "var without storage" the longest, but the fact that it couldn't +reliably used to initialize a variable meant it did not seem valuable enough to +include. It seemed more valuable to support the current `-> var`. That return +form allows you to guarantee knowing the address of the object being +constructed, and was a good match for `returned var`. + +### `return var` with composite return forms + +We considered various syntax options on +[2025-05-12](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.1mjh6unumnwu), +but none of them seemed good enough to justify inclusion at this time: + +```carbon +fn F(...) -> (ref R, let L, var V) { + // No longer a `var` being returned. Ideally these + // shouldn't have to be initialized together. + returned ??? (ref r: R, let l: L, var v: V) = ...? + return var; +} + +// We could restrict to one `var` return component, +// but this is a lot of machinery for a small increase +// in expressiveness and applicability. +fn F(...) -> (ref R, let L, var V) { + returned var v: V = ...; + let l: L = ...; + return (*r, l, var); +} + +fn F(...) -> (ref R, let L, var V) { + // These don't have the right category, and ideally + // shouldn't have to be initialized together. + returned var ret: (R, L, V) = ...? + return var; +} + +fn F(...) -> (ref R, let L, var V) { + returned var (_, _, var v: V) = ; +} +``` + +There was another approach we considered for `returned var` originally: + +```carbon +fn F(...) -> (ref R, let L, var v1: V1, var v2: V2) { + // ... + // Must use the same names for `var` parameters + return (r, l, v1, v2); +} +``` + +But this had downsides that still apply: + +- Requires `V1` and `V2` to have unformed states. Otherwise, `v1` and `v2` + would need be initialized when they are declared. +- This does not support only having some branches use `return var`. + +Our current approach handles our main use case for `returned var`: factory +functions. + +We also considered only allowing `returned var` with `-> var` returns, not +default returns. This would avoid bugs from assuming the address of the +`returned var` matches the `var` in the caller. More important, though, to +support `returned var` in generic use cases, since it avoids unnecessary +requirements on the type. + +We could support an "only `var`s" approach in the future if we want: + +``` +fn F(...) -> (var V1, var V2, var V3) { + // Would have to decide which of these would be allowed: + returned var v: (V1, V2, V3) = ...; + returned var (v1: V1, v2: V2, v3: V3) = ...; + + // ... + return var; +} +``` + +### Other syntax for compound return forms + +[2025-05-13](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.bdznj2d0by2g), +[#syntax in Discord on 2025-05-14](https://discord.com/channels/655572317891461132/709488742942900284/1372285365162872943), +and +[2025-05-14](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.52tb7l2he343) + +Omitting the `->` in each component did not distinguish tuples from paren return +forms sufficiently: + +```carbon +-> (ref i32, var i32) +-> (bool, ref i32) + +// Is this a single return of a tuple, or a triple +// return using the default return convention? +-> (i32, i32, i32) +``` + +We also considered an approach where compound return forms would start with +`->?`, but this raised concerns about what the meaning of that syntax would be +and whether we want to expose users to that in cases we might be able to avoid +it. From cc0a1f34fc12858f2f8274062597971c92538c21 Mon Sep 17 00:00:00 2001 From: Josh L Date: Fri, 16 May 2025 05:14:46 +0000 Subject: [PATCH 05/57] Checkpoint progress. --- proposals/p5434.md | 55 ++++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 15a8e94f35785..73ae329ae485e 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -23,7 +23,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Details of lifetimes](#details-of-lifetimes) - [How addresses interact with `ref`](#how-addresses-interact-with-ref) - [Improved C++ interop and migration](#improved-c-interop-and-migration) - - [Details of impact on the type system](#details-of-impact-on-the-type-system) + - [Part of the expression type system, not object types](#part-of-the-expression-type-system-not-object-types) - [Interaction with `returned var`](#interaction-with-returned-var) - [Use case: `Deref` interface](#use-case-deref-interface) - [Use case: indexing interfaces](#use-case-indexing-interfaces) @@ -60,37 +60,45 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - Unlike pointers, a `ref` binding may not be rebound to a different object. - This replaces `addr`, and is not restricted to the `self` parameter. - - A `ref` binding may not be used in fields of classes or structs. + - A `ref` binding, like a `let` binding, may not be used in fields of + classes or structs. - When calling functions, arguments to non-`self` `ref` parameters are also marked with `ref`. - The return of a function can optionally be marked `ref`, `let`, or `var`. These control the category of the call expression invoking the function, and how the return expression is returned. - - This applies to individual components for functions returning parens or - brace forms. + - These may be mixed for functions returning parens or brace forms. - Any parameters whose lifetime needs to contain the lifetime of the return must be marked `bound`. -- The address of a `ref` binding is `nocapture` and `noalias`. A `ref` - parameter of a function may be referenced by the return value if the `ref` - binding is also marked `bound`. +- The address of a `ref` binding is `nocapture` and `noalias`. +- We mark parameters of a function that may be referenced by the return value + with `bound`. ## Problem -FIXME: What problem are you trying to solve? How important is that problem? Who -is impacted by it? - Reference bindings have come up multiple times: -- Better alternative to `addr self: Self*` -- Lambda captures -- [Nested bindings within a destructured `var`](https://github.com/carbon-language/carbon-lang/issues/5250) -- Forwarding of arguments preserving expression category. +- as a better alternative to `addr self: Self*`, +- for use in [lambda captures](/docs/design/lambdas.md), +- to support + [nested bindings within a destructured `var`](https://github.com/carbon-language/carbon-lang/issues/5250), +- for forwarding arguments while preserving + [expression category](/docs/design/README.md#expression-categories), and +- to support breaking up an expression into pieces without altering the + expression category of individual pieces. + +Reference returns have also come up before, particularly to support operators +such as indexing `[`...`]` and other functions that should produce a reference +expression. It is desirable, though, that this not introduce new memory unsafety +concerns, due to returning a reference to something with insufficient lifetime. -They also closely match the expression category. +In addition, we have been interested in adding other return mechanisms that +support returning values in registers in cases that our current convention +won't. ## Background -FIXME: reference expressions +FIXME: reference expressions in Carbon FIXME: `addr self` @@ -109,6 +117,8 @@ with [pointer capture rules](https://llvm.org/docs/LangRef.html#pointer-capture) FIXME: [`clang::lifetimebound` attribute](https://clang.llvm.org/docs/AttributeReference.html#id8) +FIXME: references in C++ + ## Proposal The keyword `ref` marks a `:` binding as binding to a reference expression, as @@ -338,7 +348,7 @@ functions. FIXME: Can a `var`/`ref`/`let` return be bound to a `var`/`ref`/`let` parameter? Yes in all 9 combinations. Examples: -``` +```carbon fn RefToLet(bound ref x: C) -> let D { return x.d; } fn LetToRef(bound y: C) -> ref D { return *y.ptr; } fn VarToRef(bound var p: i32*) -> ref i32 { return *p; } @@ -473,7 +483,9 @@ is that the `nocapture` and `noalias` modifiers don't match C++ restrictions, particularly on the `this` parameter that we are going to require migrate to `ref self`. -### Details of impact on the type system +### Part of the expression type system, not object types + +Much like value bindings, a `ref` binding is not These will ultimately be part of the type system, but the goal is for them to only be part of the type system through patterns used in the type system: @@ -486,8 +498,9 @@ implementation flexibility around layout, etc. This specifically means we will need to incorporate `ref` bindings into the `Call` interface and we will be adding complexity there that will need to be -handled by overloading. The overloading impact specifically is likely future -work, but will at least carry additional complexity to handle `ref`. +handled by overloading. The changes to the `Call` interface is future work, and +overloading, once we add support, will need to carry additional complexity to +handle `ref`. ### Interaction with `returned var` @@ -778,7 +791,7 @@ requirements on the type. We could support an "only `var`s" approach in the future if we want: -``` +```carbon fn F(...) -> (var V1, var V2, var V3) { // Would have to decide which of these would be allowed: returned var v: (V1, V2, V3) = ...; From eb2ab64617a8763baa12dcf0fca1904f3026cee9 Mon Sep 17 00:00:00 2001 From: Josh L Date: Fri, 16 May 2025 15:12:11 +0000 Subject: [PATCH 06/57] Checkpoint progress. --- proposals/p5434.md | 113 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 93 insertions(+), 20 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 73ae329ae485e..93d0690e6c378 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -18,7 +18,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Proposal](#proposal) - [Details](#details) - [Return conventions](#return-conventions) - - [Composite return forms and patterns](#composite-return-forms-and-patterns) + - [Compound return forms and patterns](#compound-return-forms-and-patterns) - [`bound` parameters](#bound-parameters) - [Details of lifetimes](#details-of-lifetimes) - [How addresses interact with `ref`](#how-addresses-interact-with-ref) @@ -47,7 +47,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Temporary lifetime extension](#temporary-lifetime-extension) - [`bound` would change the default return to `let`](#bound-would-change-the-default-return-to-let) - [Other return conventions](#other-return-conventions) - - [`return var` with composite return forms](#return-var-with-composite-return-forms) + - [`return var` with compound return forms](#return-var-with-compound-return-forms) - [Other syntax for compound return forms](#other-syntax-for-compound-return-forms) @@ -205,6 +205,8 @@ if the `ref` binding is also marked `bound`. Same semantics as FIXME: Exact structure of how `ref` is attached within patterns w.r.t. destructuring, etc. +FIXME: `ref` parameters pass an address/pointer. + ## Details FIXME: Expectation that we would have a goal to use references instead of @@ -222,22 +224,52 @@ pointers when possible. - `let`: callee returns by value, using the value representation which could be copy, reference, or custom - call is a value expression + - implementation is allowed to return in registers, if the value + representation fits, or spill to memory - default: - copy in registers if the initializing representation is a copy - `var` convention otherwise - - produces an initializing expression + - call an initializing expression + +FIXME: Default and `var` returns both produce initializing expressions, but are +not the same. + +```carbon +fn F() -> var T { + returned var v: T = {.p = &v}; + return var; +} + +// Changing this to fn G() -> var T has a semantic effect +// if T has a copy initializing representation (and this +// passes safety checks). +fn G() -> T { + return F(); +} + +fn H() -> var T { + return G(); +} + +var x: T = H(); +// Is x.p == &x at this point? +``` -### Composite return forms and patterns +### Compound return forms and patterns Mirroring the paren and brace pattern forms, we also support paren and brace -return forms. FIXME: link to pattern docs. +return forms. FIXME: link to pattern docs. Every element of these forms starts +with `->` and the kind of return (if not default). + +FIXME: We add support for `var`, `let`, and `ref` to patterns in these positions +as well. ```carbon // Paren return form fn ParenReturn(...) -> (-> bool, ->let f32, ->var C, ->ref i32); -let (var a: bool, b: f32, var c: C, ref d: D) +let (var a: bool, b: f32, var c: C, ref d: i32) = ParenReturn(...); fn BraceReturn(...) @@ -248,19 +280,63 @@ fn BraceReturn(...) // Binds to the names `x`, `y`, `z`, `w`: let {.a = var x: bool, - .b = let y: f32, + .b = y: f32, .c = var z: C, .d = ref w: i32} = BraceReturn(...); // Binds to the names `a`, `b`, `c`, `d`: let {var a: bool, - let b: f32, + b: f32, var c: C, ref d: i32} = BraceReturn(...); + +// Above two can be mixed, binding to +// names `a`, `b`, `z`, `w`. +let {var a: bool, + b: f32, + .c = var z: C, + .d = ref w: i32} = BraceReturn(...); ``` -FIXME: We add support for `var`, `let`, and `ref` to patterns in these positions -as well. +Note that we can distinguish between returning a compound form and a tuple or +struct with one token of lookahead. After a `-> let`, `-> var` or `-> ref`, may +only have a type, not a compound return form. Examples: + +```carbon +// Returns a tuple of type +// `(bool, f32, C, i32)`. +fn TupleReturn(...) -> (bool, f32, C, i32); + +// Invalid, every element must start with +// `->` to be a form. +fn Invalid1(...) -> (bool, ->let f32); + +// Invalid, can only specify return category +// on innermost `->`. +fn Invalid2(...) -> var (-> bool, -> f32); +``` + +To nest, use the default `->` in the outer form, as in: + +```carbon +fn BracesInParens(...) + -> (->{-> .a: bool, ->let .b: f32}, ->var C, -ref i32); + +let ({.a = var x: bool, .b = let y: f32}, + var c: C, ref d: i32) = BracesInParens(); +// or without renaming: +let ({var a: bool, let b: f32}, + var c: C, ref d: i32) = BracesInParens(); + +fn ParensInBraces(...) + -> {-> .a: bool, + -> .b: (->let f32, ->var C), + ->ref .c: i32}; + +let {a: bool, + .b = (x: f32, var y: C), + ref c: i32} = ParensInBraces(...); +``` ### `bound` parameters @@ -729,14 +805,14 @@ include. It seemed more valuable to support the current `-> var`. That return form allows you to guarantee knowing the address of the object being constructed, and was a good match for `returned var`. -### `return var` with composite return forms +### `return var` with compound return forms We considered various syntax options on [2025-05-12](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.1mjh6unumnwu), but none of them seemed good enough to justify inclusion at this time: ```carbon -fn F(...) -> (ref R, let L, var V) { +fn F(...) -> (->ref R, ->let L, ->var V) { // No longer a `var` being returned. Ideally these // shouldn't have to be initialized together. returned ??? (ref r: R, let l: L, var v: V) = ...? @@ -746,20 +822,20 @@ fn F(...) -> (ref R, let L, var V) { // We could restrict to one `var` return component, // but this is a lot of machinery for a small increase // in expressiveness and applicability. -fn F(...) -> (ref R, let L, var V) { +fn F(...) -> (->ref R, ->let L, ->var V) { returned var v: V = ...; let l: L = ...; return (*r, l, var); } -fn F(...) -> (ref R, let L, var V) { +fn F(...) -> (->ref R, ->let L, ->var V) { // These don't have the right category, and ideally // shouldn't have to be initialized together. returned var ret: (R, L, V) = ...? return var; } -fn F(...) -> (ref R, let L, var V) { +fn F(...) -> (->ref R, ->let L, ->var V) { returned var (_, _, var v: V) = ; } ``` @@ -767,7 +843,7 @@ fn F(...) -> (ref R, let L, var V) { There was another approach we considered for `returned var` originally: ```carbon -fn F(...) -> (ref R, let L, var v1: V1, var v2: V2) { +fn F(...) -> (->ref R, ->let L, ->var v1: V1, ->var v2: V2) { // ... // Must use the same names for `var` parameters return (r, l, v1, v2); @@ -792,11 +868,8 @@ requirements on the type. We could support an "only `var`s" approach in the future if we want: ```carbon -fn F(...) -> (var V1, var V2, var V3) { - // Would have to decide which of these would be allowed: - returned var v: (V1, V2, V3) = ...; +fn F(...) -> (->var V1, ->var V2, ->var V3) { returned var (v1: V1, v2: V2, v3: V3) = ...; - // ... return var; } From a71ba59c7725eb58339631114a460251c5ac84e2 Mon Sep 17 00:00:00 2001 From: Josh L Date: Fri, 16 May 2025 20:08:38 +0000 Subject: [PATCH 07/57] Checkpoint progress. --- proposals/p5434.md | 120 ++++++++++++++++++++++++++++++++------------- 1 file changed, 86 insertions(+), 34 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 93d0690e6c378..79ca464bcdcd5 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -16,6 +16,8 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Problem](#problem) - [Background](#background) - [Proposal](#proposal) + - [`ref` bindings](#ref-bindings) + - [`ref`, `let`, and `var` returns](#ref-let-and-var-returns) - [Details](#details) - [Return conventions](#return-conventions) - [Compound return forms and patterns](#compound-return-forms-and-patterns) @@ -98,31 +100,37 @@ won't. ## Background -FIXME: reference expressions in Carbon - -FIXME: `addr self` - -[leads issue #5261: We should add `ref` bindings to to Carbon, paralleling reference expressions](https://github.com/carbon-language/carbon-lang/issues/5261) - -FIXME: -[LLVM `noalias` attribute](https://llvm.org/docs/LangRef.html#function-attributes), -[LLVM pointer aliasing rules](https://llvm.org/docs/LangRef.html#pointer-aliasing-rules) - -FIXME: -[LLVM `nocapture` attribute](https://releases.llvm.org/11.0.0/docs/LangRef.html#parameter-attributes) -has become -[`captures(none)` and `captures(ret: address, provenance)`](https://llvm.org/docs/LangRef.html#function-attributes) -with [pointer capture rules](https://llvm.org/docs/LangRef.html#pointer-capture) - -FIXME: -[`clang::lifetimebound` attribute](https://clang.llvm.org/docs/AttributeReference.html#id8) - -FIXME: references in C++ +- Carbon has + [reference expressions](/docs/design/values.md#reference-expressions). +- Using + [the `addr` keyword on mutating methods to get a `self` with a pointer type](/docs/design/classes.md#methods) + was introduced in + [proposal #722: "Nominal classes and methods"](/proposals/p0722.md#keyword-to-indicate-pass-by-address). +- [Leads issue #5261: "We should add `ref` bindings to Carbon, paralleling reference expressions"](https://github.com/carbon-language/carbon-lang/issues/5261) + supports adding `ref` bindings to Carbon. +- [LLVM's `noalias` attribute](https://llvm.org/docs/LangRef.html#function-attributes) + is used to mark a pointer as being aliased in only limited ways to enable + optimization. Also see + [LLVM's pointer aliasing rules](https://llvm.org/docs/LangRef.html#pointer-aliasing-rules). +- Marking a pointer as not captured, to allow optimizations, was originally + done with + [LLVM's `nocapture` attribute](https://releases.llvm.org/11.0.0/docs/LangRef.html#parameter-attributes), + which has become + [`captures(none)` and `captures(ret: address, provenance)`](https://llvm.org/docs/LangRef.html#function-attributes), + which is governed by + [pointer capture rules](https://llvm.org/docs/LangRef.html#pointer-capture). +- Clang allows C++ code to use the + [`clang::lifetimebound` attribute](https://clang.llvm.org/docs/AttributeReference.html#id8) + to mark parameters that may be referenced by the return value, in order to + detect some classes of use-after-free memory-safety bugs. +- [C++ has reference types](https://en.cppreference.com/w/cpp/language/reference). ## Proposal -The keyword `ref` marks a `:` binding as binding to a reference expression, as -in: +### `ref` bindings + +We introduce a new keyword `ref`. This may be added to a `:` binding to mark it +as binding to a reference expression, as in: ```carbon fn F(ptr: i32*) { @@ -178,16 +186,63 @@ class C { } ``` -Potentially abbreviating the syntax further is left as future work. +Potentially abbreviating the syntax further (to allow `ref self` as a short form +of `ref self: Self`) is left as future work. + +The `ref` modifier is allowed on any `:` parameter or any `let` binding in a +function body. A `ref` parameter is passed by address. + +````carbon +fn AddTwoToRef(ref x: i32) { + x += 1; + let ref y: i32 = x; + y += 1; +} + +// Equivalent to: +fn AddTwoToRef(ref x: i32) { + x += 1; + let ref y_ptr: i32* = &x; + *y_ptr += 1; +} +``` + +The `ref` modifier is forbidden on the bindings in `class` or `struct` fields. + +When calling functions, arguments to non-`self` `ref` parameters are also +marked with `ref`. Continuing the example: + +```carbon +var z: i32 = 3; +AddTwoToRef(ref z); +Assert(z == 5); + +// No `ref` though on the `self` argument. +var c: C = {.x = 4}; +c.NewMethod(); +Assert(c.Get() == 7); +```` + +Normally an argument to a non-`ref` parameter should not be marked `ref`, but it +is allowed in a generic context where the parameter may sometimes be `ref`. + +Operators will mostly not take `ref` parameters, with these exceptions: -The `ref` modifier is allowed on any `:` parameter or `let` binding. It is -forbidden on `class` or `struct` fields. +- [the address-of operator](/docs/design/expressions/pointer_operators.md) + `&`; +- [the indexing operator](/docs/design/expressions/indexing.md) `[`...`]`; +- [the member access operator](/docs/design/expressions/member_access.md) `.`; + and +- [the assignment operators](/docs/design/assignment.md) such as `=`, `+=`, + and `++`. -FIXME: When calling functions, arguments to non-`self` `ref` parameters are also -marked with `ref`. +In the cases that they do, note that the `ref` parameter will be the `self` +parameter, and so will not be marked with `ref` at the call site. As an _experiment_, we are saying a pointer formed by taking the address of a -`ref` bound name is LLVM-`nocapture` and LLVM-`noalias`. FIXME: finish +`ref` bound name is LLVM-`captures(none)` and LLVM-`noalias`. FIXME: finish + +### `ref`, `let`, and `var` returns FIXME: The return of a function can optionally be marked `ref`, `let`, or `var`. These control the category of the call expression invoking the function, and how @@ -200,12 +255,9 @@ the return expression is returned. FIXME: Parameters that the return may reference the storage of must be marked `bound`. A `ref` parameter of a function may be referenced by the return value if the `ref` binding is also marked `bound`. Same semantics as -[`clang::lifetimebound` attribute](https://clang.llvm.org/docs/AttributeReference.html#id8) - -FIXME: Exact structure of how `ref` is attached within patterns w.r.t. -destructuring, etc. - -FIXME: `ref` parameters pass an address/pointer. +[`clang::lifetimebound` attribute](https://clang.llvm.org/docs/AttributeReference.html#id8). +The address of a `bound ref` parameter is `captures(ret: address, provenance)` +instead of ``captures(none)` ## Details From 018dda5154a4a68ca21b7e1f308d53e9bfafe756 Mon Sep 17 00:00:00 2001 From: Josh L Date: Fri, 16 May 2025 22:02:47 +0000 Subject: [PATCH 08/57] Checkpoint progress. --- proposals/p5434.md | 202 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 152 insertions(+), 50 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 79ca464bcdcd5..c9801032f9be7 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -19,7 +19,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [`ref` bindings](#ref-bindings) - [`ref`, `let`, and `var` returns](#ref-let-and-var-returns) - [Details](#details) - - [Return conventions](#return-conventions) + - [Initializing returns](#initializing-returns) - [Compound return forms and patterns](#compound-return-forms-and-patterns) - [`bound` parameters](#bound-parameters) - [Details of lifetimes](#details-of-lifetimes) @@ -189,10 +189,11 @@ class C { Potentially abbreviating the syntax further (to allow `ref self` as a short form of `ref self: Self`) is left as future work. -The `ref` modifier is allowed on any `:` parameter or any `let` binding in a -function body. A `ref` parameter is passed by address. +The `ref` modifier is allowed on any `:` parameter pattern (as an alternative to +`var`) or any `let` binding in a function body. A `ref` parameter is passed by +address. -````carbon +```carbon fn AddTwoToRef(ref x: i32) { x += 1; let ref y: i32 = x; @@ -207,10 +208,28 @@ fn AddTwoToRef(ref x: i32) { } ``` +We add support for `ref` and `var` in a +[struct pattern](/docs/design/pattern_matching.md#struct-patterns) when using +the shorthand `a: T` syntax for `.a = a: T`: + +```carbon +let {var a: i32, ref b: i32} = ...; + +// Now equivalent to: +let {.a = var a: i32, .b = ref b: i32} = ...; +``` + The `ref` modifier is forbidden on the bindings in `class` or `struct` fields. -When calling functions, arguments to non-`self` `ref` parameters are also -marked with `ref`. Continuing the example: +``` +class Invalid { + // ❌ Invalid. + ref var m: i32; +} +``` + +When calling functions, arguments to non-`self` `ref` parameters are also marked +with `ref`. Continuing the example: ```carbon var z: i32 = 3; @@ -221,7 +240,7 @@ Assert(z == 5); var c: C = {.x = 4}; c.NewMethod(); Assert(c.Get() == 7); -```` +``` Normally an argument to a non-`ref` parameter should not be marked `ref`, but it is allowed in a generic context where the parameter may sometimes be `ref`. @@ -236,55 +255,131 @@ Operators will mostly not take `ref` parameters, with these exceptions: - [the assignment operators](/docs/design/assignment.md) such as `=`, `+=`, and `++`. -In the cases that they do, note that the `ref` parameter will be the `self` +In the cases that they do, observe that the `ref` parameter will be the `self` parameter, and so will not be marked with `ref` at the call site. As an _experiment_, we are saying a pointer formed by taking the address of a -`ref` bound name is LLVM-`captures(none)` and LLVM-`noalias`. FIXME: finish +`ref` bound name is LLVM-`captures(none)` and LLVM-`noalias`. ### `ref`, `let`, and `var` returns -FIXME: The return of a function can optionally be marked `ref`, `let`, or `var`. -These control the category of the call expression invoking the function, and how -the return expression is returned. +The return of a function can optionally be marked `ref`, `let`, or `var`. These +control the category of the call expression invoking the function, and how the +return expression is returned. -- This applies to individual components for functions returning parens or - brace forms. FIXME: need to support returning paren patterns that have some - elements that are `ref` for things like `enumerate`. +```carbon +var global: i32 = 2; +fn ReturnRef() -> ref i32 { + // ❌ Invalid: return 2; -FIXME: Parameters that the return may reference the storage of must be marked -`bound`. A `ref` parameter of a function may be referenced by the return value -if the `ref` binding is also marked `bound`. Same semantics as + // ✅ Valid: return a reference expression with + // sufficient lifetime. + return global; +} +// Call `ReturnRef` and use the resulting reference. +ReturnRef() += 3; +Assert(global == 5); + +// Result of `ReturnRef` can be bound using a `ref` +// binding. +fn AddFive() { + let ref r: i32 = ReturnRef(); + r += 5; +} +AddFive(); +Assert(global == 10); + +fn ReturnLet() -> let i32 { + return 2; +} +// ReturnLet() is a value expression. +let l: i32 = ReturnLet(); + +fn ReturnVar() -> var i32 { + return 2; +} +// ReturnVar() is an initializing expression. +var j: i32 = ReturnVar(); +``` + +- A call to a function declared `-> ref T` is a reference expression. The + function will return the address of a `T` object. +- A call to a function declared `-> let T` is a value expression. The function + will return the value representation of `T`. Since values have no address, + the value representation may be returned in registers. +- A call to a function declared `-> var T` is an initializing expression. The + caller provides the address of storage to initialize with an object of type + `T`, that the caller owns upon return. The object will never be returned in + registers. +- A call to a function declared `-> T` is an initializing expression. It will + either match `-> var T` or return by copy, depending on the initializing + representation of `T`. + +A function may have multiple returns, each with their own marker, by using a +paren or brace compound return form. + +```carbon +fn ParenReturn() + -> (-> bool, ->let f32, ->var C, ->ref i32) { + return (true, 1.0, {.x = 3}, global); +} + +fn BraceReturn() + -> {-> .a: bool, + ->let .b: f32, + ->var .c: C, + ->ref .d: i32} { + return {.a = true, .b = 1.0, + .c = {.x = 3}, .d = global}; +} +``` + +If the return of a function may reference the storage of one or more parameters +to the function, those parameters must be marked `bound`. This allows the +compiler to diagnose if the function's return is used after the lifetime of and +`bound` parameter ends. The semantics of `bound` are intended to match the [`clang::lifetimebound` attribute](https://clang.llvm.org/docs/AttributeReference.html#id8). -The address of a `bound ref` parameter is `captures(ret: address, provenance)` -instead of ``captures(none)` + +```carbon +fn Member(bound ref c: C) -> ref i32 { + return c.x; +} + +// Lifetime of a pointer includes the lifetime +// of what it points to. +fn Deref(bound p: i32*) -> ref i32 { + return *p; +} + +fn Both(bound pc: C*) -> ref i32 { + return p->x; +} + +fn Invalid() -> ref i32{ + var c: C = {.x = 1}; + // ❌ Error: returning reference bound to `c` + // whose lifetime ends when this function + // returns. + return Member(c) +} +``` + +Note that the address of a `bound ref` parameter is +`captures(ret: address, provenance)` instead of `captures(none)`. ## Details -FIXME: Expectation that we would have a goal to use references instead of -pointers when possible. - -### Return conventions - -- `ref`: callee returns address - - call is reference expression -- `var`: caller passes in address to initialize, callee constructs object at - that address - - call is initializing expression - - guarantees that `returned var` is the same variable in the caller and - callee -- `let`: callee returns by value, using the value representation which could - be copy, reference, or custom - - call is a value expression - - implementation is allowed to return in registers, if the value - representation fits, or spill to memory -- default: - - copy in registers if the initializing representation is a copy - - `var` convention otherwise - - call an initializing expression - -FIXME: Default and `var` returns both produce initializing expressions, but are -not the same. +The intent is that we would encourage using references instead of pointers when +possible. Their benefits are related to their limitations, so to get those +benefits we should use them when a use is restricted enough to be within those +limitations. + +### Initializing returns + +The `-> var T` return form guarantees that `returned var` is the same variable +in the caller and callee. In contrast, the `-> T` return form that also produces +an initializing expression, but may introduce a copy depending on the +initializing representation. For example: ```carbon fn F() -> var T { @@ -310,8 +405,10 @@ var x: T = H(); ### Compound return forms and patterns Mirroring the paren and brace pattern forms, we also support paren and brace -return forms. FIXME: link to pattern docs. Every element of these forms starts -with `->` and the kind of return (if not default). +return forms. FIXME: link to pattern docs +/docs/design/pattern_matching.md#tuple-patterns and +/docs/design/pattern_matching.md#struct-patterns. Every element of these forms +starts with `->` and the kind of return (if not default). FIXME: We add support for `var`, `let`, and `ref` to patterns in these positions as well. @@ -390,6 +487,9 @@ let {a: bool, ref c: i32} = ParensInBraces(...); ``` +FIXME: Example use case: returning paren patterns that have some elements that +are `ref` for things like `enumerate`. + ### `bound` parameters FIXME @@ -473,6 +573,8 @@ FIXME: Like C++, `bound` does not affect semantics or calling conventions, just what code is legal. Helps with the discrepancy between interface and impl functions. +FIXME: conditional determines which `bound` parameter is returned. + FIXME: Can a `var`/`ref`/`let` return be bound to a `var`/`ref`/`let` parameter? Yes in all 9 combinations. Examples: @@ -484,10 +586,10 @@ fn VarToVar(bound var p: i32*) -> var i32* { return p; } ``` For full safety, need a bound variable to be immutably borrowed for the duration -of the lifetime of the returned result. However, goal for now is just matching -`[[clang::lifetimebound]]`, which just has the goal of preventing some classes -of bugs, not full memory safety. We will reconsider this with the memory safety -design. +of the lifetime of the returned result. However, the objective for now is only +matching `[[clang::lifetimebound]]`, which just has the goal of preventing some +classes of bugs, not full memory safety. We will reconsider this with the memory +safety design. ### Details of lifetimes From cbacedeefd555294ef66da65d9e8541e7433c4b9 Mon Sep 17 00:00:00 2001 From: Josh L Date: Sat, 17 May 2025 00:12:53 +0000 Subject: [PATCH 09/57] Checkpoint progress. --- proposals/p5434.md | 190 +++++++++++++++++++++++++++------------------ 1 file changed, 115 insertions(+), 75 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index c9801032f9be7..a7f43b9a2cacf 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -355,7 +355,15 @@ fn Both(bound pc: C*) -> ref i32 { return p->x; } -fn Invalid() -> ref i32{ +fn Invalid1() -> ref i32 { + var x: i32 = 4; + // ❌ Error: returning reference to `x` + // whose lifetime ends when this function + // returns. + return x; +} + +fn Invalid2() -> ref i32 { var c: C = {.x = 1}; // ❌ Error: returning reference bound to `c` // whose lifetime ends when this function @@ -364,8 +372,8 @@ fn Invalid() -> ref i32{ } ``` -Note that the address of a `bound ref` parameter is -`captures(ret: address, provenance)` instead of `captures(none)`. +The address of a `bound ref` parameter is `captures(ret: address, provenance)` +instead of `captures(none)`. ## Details @@ -404,14 +412,10 @@ var x: T = H(); ### Compound return forms and patterns -Mirroring the paren and brace pattern forms, we also support paren and brace -return forms. FIXME: link to pattern docs -/docs/design/pattern_matching.md#tuple-patterns and -/docs/design/pattern_matching.md#struct-patterns. Every element of these forms -starts with `->` and the kind of return (if not default). - -FIXME: We add support for `var`, `let`, and `ref` to patterns in these positions -as well. +Mirroring the [paren](/docs/design/pattern_matching.md#tuple-patterns) and +[brace](/docs/design/pattern_matching.md#struct-patterns) pattern forms, we also +support paren and brace return forms. Every element of these forms starts with +`->` and the kind of return (if not default). ```carbon // Paren return form @@ -448,19 +452,19 @@ let {var a: bool, ``` Note that we can distinguish between returning a compound form and a tuple or -struct with one token of lookahead. After a `-> let`, `-> var` or `-> ref`, may -only have a type, not a compound return form. Examples: +struct with one token of lookahead. Only types are allowed after a `-> let`, +`-> var` or `-> ref`, not a compound return form. Examples: ```carbon // Returns a tuple of type // `(bool, f32, C, i32)`. fn TupleReturn(...) -> (bool, f32, C, i32); -// Invalid, every element must start with +// ❌ Invalid, every element must start with // `->` to be a form. fn Invalid1(...) -> (bool, ->let f32); -// Invalid, can only specify return category +// ❌ Invalid, can only specify return category // on innermost `->`. fn Invalid2(...) -> var (-> bool, -> f32); ``` @@ -487,96 +491,132 @@ let {a: bool, ref c: i32} = ParensInBraces(...); ``` -FIXME: Example use case: returning paren patterns that have some elements that -are `ref` for things like `enumerate`. +This feature is intended to support cases like `enumerate` that will want to +return a value for the index but a reference to the element of the sequence +being enumerated. ### `bound` parameters -FIXME +It is an error to return a reference to a temporary that won't live once the +function returns, even if it is a parameter marked `bound`. ```carbon -// `bound` works like [[clang::lifetimebound]] -// https://clang.llvm.org/docs/AttributeReference.html#id8 +fn Invalid1() -> i32* { + var x: i32 = 4; + // ❌ Invalid. + return &x; +} -fn F(bound x: i32) -> ref i32 { - var y: i32 = x; - // reject - return y; +fn Invalid2(bound x: i32) -> ref i32 { + var y: i32 = x; + // ❌ Invalid. + return y; } -fn G(bound p: i32*) -> ref i32 { - // accept - return *p; +// ✅ Valid +fn Valid1(bound p: i32*) -> ref i32 { + return *p; } -fn H(bound var x: i32) -> ref i32 { - // reject - return x; +fn Invalid3(bound var x: i32) -> ref i32 { + // ❌ Invalid: lifetime of `var` parameter + // ends when function returns. + return x; } -class C { - // accept - fn CF[bound ref self: Self]() -> ref i32 { return self.m; } +class ReturnMember { + // ✅ Valid + fn ValidRef[bound ref self: Self]() -> ref i32 { + return self.m; + } - // reject - fn CG[bound self: Self]() -> ref i32 { return self.m; } + // ❌ Invalid: can't return reference to value. + fn InvalidLet[bound self: Self]() -> ref i32 { + return self.m; + } - // reject - fn CH[bound var self: Self]() -> ref i32 { return self.m; } + // ❌ Invalid: `var self` lifetime ends. + fn InvalidVar[bound var self: Self]() + -> ref i32 { return self.m; } - var m: i32; + var m: i32; } -// Would be marked "pointer" if we decide to support that -class D { - // accept - fn DF[bound ref self: Self]() -> ref i32 { return *self.pm; } +class DerefPointerMember { + // ✅ Valid + fn ValidRef[bound ref self: Self]() -> ref i32 { + return *self.pm; + } - // accept - fn DG[bound self: Self]() -> ref i32 { return *self.pm; } + // ✅ Valid + fn ValidLet[bound self: Self]() -> ref i32 { + return *self.pm; + } - // accept - fn DH[bound var self: Self]() -> ref i32 { return *self.pm; } + // ✅ Valid + fn ValidVar[bound var self: Self]() + -> ref i32 { return *self.pm; } - var pm: i32*; + var pm: i32*; } +``` -class E { - fn Make() -> E { - return var ret: E; - ret.m = 0; - ret.pm = &ret.m; - return var; - } - // accept - fn EF[bound ref self: Self]() -> ref i32 { return *self.pm; } +The `bound` parameters must include everything referenced by the return, but +need not be referenced, particularly not on every code path. - // could reject if we had an annotation saying E is not pointer - fn EG[bound self: Self]() -> ref i32 { return *self.pm; } +```carbon +// Result references `r` if `b` is true, and `p` +// otherwise. Valid as long as both are marked `bound`. +fn Conditional(b: bool, bound ref r: C, bound p: C*) + -> ref C { + if (b) { + return r; + } else { + return *p; + } +} +``` - // could reject if we had an annotation saying E is not pointer - fn EH[bound var self: Self]() -> ref i32 { return *self.pm; } +The parameters of functions defined in an interface may also be marked as +`bound`. The `impl` of that interface for a type can only have `bound` in a +subset of the positions present in the interface. - var pm: i32*; - var m: i32; +```carbon +interface I { + fn F[bound ref self: Self] + (a: Self, bound b: Self, bound c: Self*) + -> ref Self; } -``` -FIXME: If a temporary argument is bound to a return that outlives it, it is an -error. +impl C1 as I { + // ✅ Valid: matches interface + fn F[bound ref self: Self] + (a: Self, bound b: Self, bound c: Self*) + -> ref Self; +} -FIXME: Can write `bound` the parameters of a function member of an interface. -The `impl` of that interface for a type can only have `bound` in a subset of the -positions present in the interface. +impl C2 as I { + // ✅ Valid: proper subset of `bound` params + fn F[ref self: Self] + (a: Self, bound b: Self, c: Self*) + -> ref Self; +} -FIXME: Like C++, `bound` does not affect semantics or calling conventions, just -what code is legal. Helps with the discrepancy between interface and impl -functions. +impl C2 as I { + // ❌ Invalid: `a` is not bound in `I.F`. + fn F[ref self: Self] + (bound a: Self, b: Self, c: Self*) + -> ref Self; +} +``` -FIXME: conditional determines which `bound` parameter is returned. +Like `[[clang::lifetimebound]]` in C++, `bound` does not affect semantics or +calling conventions, just what code is legal. This helps avoid mismatches +between typechecking against the signatures in an interface when the impl +functions are different. -FIXME: Can a `var`/`ref`/`let` return be bound to a `var`/`ref`/`let` parameter? -Yes in all 9 combinations. Examples: +Note that all combinations of a `var`/`ref`/`let` return can be bound to a +`var`/`ref`/`let` parameter. Examples: ```carbon fn RefToLet(bound ref x: C) -> let D { return x.d; } @@ -587,7 +627,7 @@ fn VarToVar(bound var p: i32*) -> var i32* { return p; } For full safety, need a bound variable to be immutably borrowed for the duration of the lifetime of the returned result. However, the objective for now is only -matching `[[clang::lifetimebound]]`, which just has the goal of preventing some +matching `[[clang::lifetimebound]]`, which has the goal of preventing some classes of bugs, not full memory safety. We will reconsider this with the memory safety design. From 2d6a7a3bd2db1d33f76ec0276432ddfc838bc08e Mon Sep 17 00:00:00 2001 From: Josh L Date: Sat, 17 May 2025 03:50:00 +0000 Subject: [PATCH 10/57] Checkpoint progress. --- proposals/p5434.md | 289 +++++++++++++++++++++++++++++++++------------ 1 file changed, 213 insertions(+), 76 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index a7f43b9a2cacf..f39a3b952734a 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -22,7 +22,6 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Initializing returns](#initializing-returns) - [Compound return forms and patterns](#compound-return-forms-and-patterns) - [`bound` parameters](#bound-parameters) - - [Details of lifetimes](#details-of-lifetimes) - [How addresses interact with `ref`](#how-addresses-interact-with-ref) - [Improved C++ interop and migration](#improved-c-interop-and-migration) - [Part of the expression type system, not object types](#part-of-the-expression-type-system-not-object-types) @@ -30,13 +29,11 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Use case: `Deref` interface](#use-case-deref-interface) - [Use case: indexing interfaces](#use-case-indexing-interfaces) - [Use case: class accessors](#use-case-class-accessors) - - [Use case: `OptionalRef`](#use-case-optionalref) - [Future work](#future-work) - [`ref` bindings in lambdas](#ref-bindings-in-lambdas) - [Interaction with effects](#interaction-with-effects) - [More precise lifetimes](#more-precise-lifetimes) - - [Combining with compile-time bindings](#combining-with-compile-time-bindings) - - [Tuple parameters with `ref` elements](#tuple-parameters-with-ref-elements) + - [Combining with compile-time](#combining-with-compile-time) - [Interaction with `Call` or other interfaces](#interaction-with-call-or-other-interfaces) - [Rationale](#rationale) - [Alternatives considered](#alternatives-considered) @@ -46,7 +43,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [No call-site annotation](#no-call-site-annotation) - [Top-level `ref` introducer](#top-level-ref-introducer) - [`ref` as a type qualifier](#ref-as-a-type-qualifier) - - [Temporary lifetime extension](#temporary-lifetime-extension) + - [`bound` causes temporary lifetime extension](#bound-causes-temporary-lifetime-extension) - [`bound` would change the default return to `let`](#bound-would-change-the-default-return-to-let) - [Other return conventions](#other-return-conventions) - [`return var` with compound return forms](#return-var-with-compound-return-forms) @@ -157,6 +154,28 @@ expression. The address of a `ref` bound name gives the address of the bound object, so `&x == ptr` above. The reference itself does not have an address, and unlike a pointer can't be rebound to reference a different object. +We ensure that reference expressions formed by way of reference bindings _do not +dangle_. For example, temporaries bound to a reference have their +[lifetime extended, as is done in C++](https://en.cppreference.com/w/cpp/language/reference_initialization#Lifetime_of_a_temporary): + +```carbon +fn ReturnsInt() -> i32; +fn ExtendTempLifetime() -> i32 { + // `ReturnsInt()` returns a temporary whose + // lifetime is extended to match `r`. + let ref r: i32 = ReturnsInt(); + r += 1; + return r + 2; + // Lifetime of `r` and temporary ends here. +} +``` + +FIXME: QUESTION: Are there use cases where we really need temporary lifetime +extension? Was asked for in +[leads issue #5261: "We should add `ref` bindings to Carbon, paralleling reference expressions"](https://github.com/carbon-language/carbon-lang/issues/5261). +Seems like `TakesARefParam(ReturnsATemporary())` is likely to be a bug, and +`let ref ... = ReturnsATemporary();` could be changed to `var` instead. + We remove `addr`, and use instead use `ref` for the `self` parameter when an object is required. Note that the type will change from `Self*` to `Self` in this case. @@ -190,8 +209,7 @@ Potentially abbreviating the syntax further (to allow `ref self` as a short form of `ref self: Self`) is left as future work. The `ref` modifier is allowed on any `:` parameter pattern (as an alternative to -`var`) or any `let` binding in a function body. A `ref` parameter is passed by -address. +`var`) or any `let` binding in a function body. ```carbon fn AddTwoToRef(ref x: i32) { @@ -259,7 +277,13 @@ In the cases that they do, observe that the `ref` parameter will be the `self` parameter, and so will not be marked with `ref` at the call site. As an _experiment_, we are saying a pointer formed by taking the address of a -`ref` bound name is LLVM-`captures(none)` and LLVM-`noalias`. +`ref` bound name is LLVM-`captures(none)` and LLVM-`noalias`. This means that +while a `ref` parameter could be passed into a function by address, but the +restrictions are intended to also allow a "move-in-move-out" approach (once we +define the move operation), assuming it is not +[marked `bound`](#bound-parameters). The intent here is to leave the door open +to a calling convention using registers and less indirection for small-enough +objects. ### `ref`, `let`, and `var` returns @@ -613,7 +637,8 @@ impl C2 as I { Like `[[clang::lifetimebound]]` in C++, `bound` does not affect semantics or calling conventions, just what code is legal. This helps avoid mismatches between typechecking against the signatures in an interface when the impl -functions are different. +functions are different. In particular, a temporary is passed to a `bound` +parameter is an error, instead of having its lifetime extended. Note that all combinations of a `var`/`ref`/`let` return can be bound to a `var`/`ref`/`let` parameter. Examples: @@ -631,33 +656,10 @@ matching `[[clang::lifetimebound]]`, which has the goal of preventing some classes of bugs, not full memory safety. We will reconsider this with the memory safety design. -### Details of lifetimes - -We should ensure that reference expressions formed by way of reference bindings -_do not dangle_. - -So for any reference expression that has a known lifetime already in the -language, such as those associated with temporaries or `var` declarations, we -should either lifetime-extend (in the case of temporaries) or error (in the case -of declarations) when trying to form a binding that would outlive the referenced -object. - -For reference expressions without known lifetimes currently such as dereferenced -pointers, while we should allow them despite unsafety today, we should fully -expect lifetime safety in Carbon to eventually introduce a way of reasoning -about these lifetimes and with that a requirement that the lifetime of the -binding be satisfied. That should be explicitly expected as future work and part -of getting an overall safety story for Carbon. - -This does fundamentally mean that we now have another kind of "pointer", -potentially adding complexity to any memory-safety story. However, I think this -ship already sailed to some extent with value bindings. Fundamentally, bindings -are allowed to have pointer-like semantics from a lifetime perspective, and so -will need to be considered as a pointer-like thing as we build out lifetime -safety. - ### How addresses interact with `ref` +FIXME + The suggested model is that `ref` bindings mirror reference expressions in that they refer back to some underlying object. As a consequence, it should be possible to take the address of a `ref` binding and get the address of that @@ -747,24 +749,20 @@ but also a deeper level because it will make it significantly harder to see the parallel usage across the boundary between C++ and Carbon. With reference bindings, the vast majority of this dissonance will be removed. -FIXME: Concern from -[open discussion on 2025-05-01](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.dffumsu6wzlc) -is that the `nocapture` and `noalias` modifiers don't match C++ restrictions, +This does create a migration concern, raised in +[open discussion on 2025-05-01](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.dffumsu6wzlc), +that the `nocapture` and `noalias` modifiers don't match C++ restrictions, particularly on the `this` parameter that we are going to require migrate to `ref self`. ### Part of the expression type system, not object types -Much like value bindings, a `ref` binding is not - -These will ultimately be part of the type system, but the goal is for them to -only be part of the type system through patterns used in the type system: -function parameters, etc. - -Specifically, we don't expect them to be part of the _object types_ in Carbon, -but only part of the expression categories and bindings within patterns. In this -regard, they are very similar to _value_ bindings -- we retain a great deal of -implementation flexibility around layout, etc. +Much like value/`let` and `var` bindings, `ref` binding and the new return forms +are are part of the type system, but only through expression categories, +patterns (function parameters and so on), and returns. Specifically, we don't +expect them to be part of the _object types_ in Carbon. Like value bindings, we +retain a great deal of implementation flexibility around layout, and the +specifics of how they are lowered. This specifically means we will need to incorporate `ref` bindings into the `Call` interface and we will be adding complexity there that will need to be @@ -797,46 +795,167 @@ fn F(...) -> {->var .a : T} { } ``` -Can revisit later if motivated. +We can revisit and expand this later if this does not handle use cases we would +like to support. ### Use case: `Deref` interface -FIXME +FIXME: Support customization of the prefix-`*` dereferencing operator. + +```carbon +interface Deref { + let Result:! type; + fn Op[bound ref self: Self]() -> ref Result; +} + +final impl forall [T:! type] T* as Deref { + where Result = T; + fn Op[bound ref self: Self]() -> ref T + = "builtin.deref"; +} +``` + +Then `*p` is rewritten to `p.(Deref.Op)()`. ### Use case: indexing interfaces FIXME -### Use case: class accessors +Before: -FIXME +```carbon +interface IndexWith(SubscriptType:! type) { + let ElementType:! type; + fn At[self: Self](subscript: SubscriptType) -> ElementType; + fn Addr[addr self: Self*](subscript: SubscriptType) -> ElementType*; +} + +interface IndirectIndexWith(SubscriptType:! type) { + require Self impls IndexWith(SubscriptType); + fn Addr[self: Self](subscript: SubscriptType) -> ElementType*; +} -### Use case: `OptionalRef` +final impl forall + [SubscriptType:! type, T:! IndirectIndexWith(SubscriptType)] + T as IndexWith(SubscriptType) { + let ElementType:! type = T.(IndirectIndexWith(SubscriptType).ElementType); + fn At[self: Self](subscript: SubscriptType) -> ElementType { + return *(self.(IndirectIndexWith(SubscriptType).Addr)(index)); + } + fn Addr[addr self: Self*](subscript: SubscriptType) -> ElementType* { + return self->(IndirectIndexWith(SubscriptType).Addr)(index); + } +} +``` + +After: + +```carbon +interface IndexWith(SubscriptType:! type) { + let ElementType:! type; + fn At[self: Self](subscript: SubscriptType) -> ElementType; + fn Addr[bound ref self: Self](subscript: SubscriptType) -> ref ElementType; +} + +interface IndirectIndexWith(SubscriptType:! type) { + require Self impls IndexWith(SubscriptType); + fn Addr[bound self: Self](subscript: SubscriptType) -> ref ElementType; +} + +final impl forall + [SubscriptType:! type, T:! IndirectIndexWith(SubscriptType)] + T as IndexWith(SubscriptType) { + let ElementType:! type = T.(IndirectIndexWith(SubscriptType).ElementType); + fn At[self: Self](subscript: SubscriptType) -> ElementType { + return self.(IndirectIndexWith(SubscriptType).Addr)(index); + } + fn Addr[bound ref self: Self](subscript: SubscriptType) -> ref ElementType { + return self.(IndirectIndexWith(SubscriptType).Addr)(index); + } +} +``` + +`lhs[index]` with `lhs` of type `T` and `index` of type `I` will be rewritten to +one of: + +- Before: `*(lhs.(IndirectIndexWith(I).Addr)(index))` -> After: + `lhs.(IndirectIndexWith(I).Addr)(index)` +- Before: `*(lhs.(IndexWith(I).Addr)(index))` -> After: + `lhs.(IndexWith(I).Addr)(index)` +- Unchanged: `lhs.(IndexWith(I).At)(index)` + +depending on whether `T` is known to implement `IndirectIndexWith(I)` and +whether `lhs` is a +[durable reference expression](/docs/design/values.md#durable-reference-expressions). + +### Use case: class accessors FIXME +```carbon +class HasMember { + // Eventually these functions would be + // together in an overload set. + fn Get[self: Self]() -> i32 { return self.m; } + fn GetMut[bound ref self: Self]() -> i32 { + return self.m; + } + var m: i32; +} +``` + ## Future work ### `ref` bindings in lambdas -FIXME: might be a reason to support `ref` bindings as fields, with all the -restrictions that comes with that. +We have already identified +[future work to support reference captures in lambdas as part of proposal #3848](/proposals/p3848.md#future-work-reference-captures). +This might be a reason to support `ref` bindings as fields of objects, with all +the restrictions that comes with that. ### Interaction with effects -FIXME: `Optional`, errors/`Result`, co-routines, async +We still need to determine how references and the other return types interact +with effects, like `Optional`, errors, co-routines, and so on. For example, we +don't want to give up the benefits of being able to directly return a reference +when a function has an error path. -### More precise lifetimes +It is unclear if this will mean putting references into the object type system, +but we may be able to handle this with additional types or the ability to +customize return representations. For example, we might have an alternate +version of the `Optional` type that holds a reference: -FIXME +```carbon +class OptionalRef(T:! type) { + fn Make(bound ref r: T) -> Self { + return {.p = &r}; + } + fn MakeEmpty() -> Self { + return {.p = Optional(T*).MakeEmpty()}; + } + fn HasValue[self: Self]() -> bool { + return p.HasValue(); + } + fn Get[bound ref self: Self]() -> ref Result { + Assert(self.HasValue()); + return *self.p.Get(); + } -### Combining with compile-time bindings + private var p: Optional(T*); +} +``` -FIXME +### More precise lifetimes -### Tuple parameters with `ref` elements +More precise lifetime tracking will be considered with the memory safety design. +For example, the `bound` approach does not distinguish different components of a +compound return, or different parts of a parameter object that might have +different lifetimes. -FIXME +### Combining with compile-time + +FIXME: We plan to support references to compile-time state when executing a +function at compile time. That will be part of a future proposal. ### Interaction with `Call` or other interfaces @@ -848,28 +967,30 @@ being generic across these aspects of bindings. ## Rationale -FIXME: How does this proposal effectively advance Carbon's goals? Rather than -re-stating the full motivation, this should connect that motivation back to -Carbon's stated goals and principles. This may evolve during review. Use links -to appropriate sections of [`/docs/project/goals.md`](/docs/project/goals.md), -and/or to documents in [`/docs/project/principles`](/docs/project/principles). -For example: +This proposal tries to advance these [Carbon goals](/docs/project/goals.md): -- [Community and culture](/docs/project/goals.md#community-and-culture) -- [Language tools and ecosystem](/docs/project/goals.md#language-tools-and-ecosystem) - [Performance-critical software](/docs/project/goals.md#performance-critical-software) -- [Software and language evolution](/docs/project/goals.md#software-and-language-evolution) + - Having a "move-in-move-out" option as a calling convention is a + potential performance improvement for using `ref` parameters instead of + pointers. + - Giving additional options for the return convention gives opportunities + for improved performance. Having this set by explicit return markings is + about giving control and predictability to the code author. - [Code that is easy to read, understand, and write](/docs/project/goals.md#code-that-is-easy-to-read-understand-and-write) + - `ref` bindings and returns avoid the ceremony of round-tripping through + pointers. - [Practical safety and testing mechanisms](/docs/project/goals.md#practical-safety-and-testing-mechanisms) -- [Fast and scalable development](/docs/project/goals.md#fast-and-scalable-development) -- [Modern OS platforms, hardware architectures, and environments](/docs/project/goals.md#modern-os-platforms-hardware-architectures-and-environments) + - Checking that reference bindings are not dangling is important for + avoiding use-after-free bugs. + - `bound` markings on parameters to allow safety equivalent to Clang's + `[[clang::lifetimebound]]` when returning a reference. - [Interoperability with and migration from existing C++ code](/docs/project/goals.md#interoperability-with-and-migration-from-existing-c-code) + - Including references in Carbon allows for less mismatch for C++ code + using references. ## Alternatives considered -FIXME: What alternative solutions have you considered? - -Discussed in open discussion on: +These ideas were discussed in open discussion on: - [2025-05-01](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.dffumsu6wzlc) - [2025-05-06](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.s42g5iv67d3c) @@ -881,6 +1002,11 @@ Discussed in open discussion on: - [2025-05-13](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.bdznj2d0by2g) - [2025-05-14](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.52tb7l2he343) +They were also discussed in the +[#pointers-and-references channel in Discord starting 2025-05-05](https://discord.com/channels/655572317891461132/753021843459538996/1369085231038074901), +and +[#syntax on 2025-05-14](https://discord.com/channels/655572317891461132/709488742942900284/1372285365162872943). + ### No `ref`, only pointers - [2025-05-01](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.dffumsu6wzlc) @@ -914,6 +1040,13 @@ possible. Counter: good to see indirections when they are present +This does fundamentally mean that we now have another kind of "pointer", +potentially adding complexity to any memory-safety story. However, I think this +ship already sailed to some extent with value bindings. Fundamentally, bindings +are allowed to have pointer-like semantics from a lifetime perspective, and so +will need to be considered as a pointer-like thing as we build out lifetime +safety. + ### Remove pointers after adding references Need to have something rebindable in an assignable object. The viable path @@ -944,6 +1077,10 @@ enforced by the compiler. Swift `inout` parameters are marked at caller with an `&` before the argument https://docs.swift.org/swift-book/documentation/the-swift-programming-language/functions/#In-Out-Parameters +And Jordon Rose has published +[a regret](https://belkadan.com/blog/2021/12/Swift-Regret-inout-Syntax/) that +they didn't use `inout` to mark the argument instead of `&`. + On the other hand, not marking is not known to be a source of bugs. This is a "try it and see how well it works" sort of decision. @@ -957,7 +1094,7 @@ write, unlike what we do for `var. FIXME: big concern is effects, but also representing argument lists with tuples -### Temporary lifetime extension +### `bound` causes temporary lifetime extension FIXME: No. Neither Rust nor C++ does this in these cases. From 4a87f20b921ad6e5749a4cb6b41eeb3967435296 Mon Sep 17 00:00:00 2001 From: Josh L Date: Sun, 18 May 2025 01:06:54 +0000 Subject: [PATCH 11/57] Checkpoint progress. --- proposals/p5434.md | 112 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 102 insertions(+), 10 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index f39a3b952734a..81fa16e0a1e75 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -687,9 +687,37 @@ an open question that can likely also be an area for future work. FIXME: also `noalias` see https://github.com/carbon-language/carbon-lang/issues/5261#issuecomment-2843057728 +Do we allow shared mutable `ref`s to the same object? How does that interact +with the ability to support move-in-move-out? For example: + +```carbon +// Suppose i32 has move-in-move-out semantics for ref parameters. +fn F(ref a: i32, ref b: i32) -> bool { + // Can this affect the value of b? + a = 1; + // Can this return true? + return &a == &b; +} +fn G() -> bool { + var v: i32; + return F(v, v); +} +``` + +It seems like the restriction we would want on `ref` parameters to support +move-in-move-out in cases like the above, and still have `ref` behave like a +reference, would be something similar to `noalias` / a "no shared references" +rule. It could well be that that's exactly what we want, but I think it's +important that we're explicit about what choice we're making here. + +If we can't make the difference between pass-by-pointer (alias) and +move-in-move-out (borrow) unobservable for `ref`, for example by adding +restrictions that prohibit observing the difference, I think we should consider +having both `ref` and `inout` so that the programmer can state their intent. + FIXME: -- `noalias` means like C restrict; you can't observe mutations through +- `noalias` means like C `restrict`; you can't observe mutations through aliases; mutation through a restricted pointer is not observable through another pointer - `captures(none)` means no transitive escape, can pass a nocapture pointer to @@ -800,7 +828,8 @@ like to support. ### Use case: `Deref` interface -FIXME: Support customization of the prefix-`*` dereferencing operator. +To support customization of the prefix-`*` dereferencing operator, we introduce +the `Deref` interface. ```carbon interface Deref { @@ -815,7 +844,21 @@ final impl forall [T:! type] T* as Deref { } ``` -Then `*p` is rewritten to `p.(Deref.Op)()`. +Then `*p` is rewritten to `p.(Deref.Op)()`, and `p->m` is rewritten to +`p.(Deref.Op)().m`. For example, this might be used by a smart pointer: + +``` +class SmartPtr(T:! type) { + fn Make(p: T*) -> Self { return {.ptr = p}; } + impl as Deref { + where Result = T; + fn Op[bound ref self: Self]() -> ref Result { + return *self.ptr; + } + } + private var ptr: T*; +} +``` ### Use case: indexing interfaces @@ -890,17 +933,66 @@ whether `lhs` is a ### Use case: class accessors -FIXME +A `ref` return can be used to expose the state of an object in a way that can be +mutated: + +```carbon +class Four { + fn Get[self: Self](i: i32) -> i32 { + Assert(i >= 0 and i < 4); + return self.m[i]; + } + fn GetMut[bound ref self: Self](i: i32) -> ref i32 { + Assert(i >= 0 and i < 4); + return self.m[i]; + } + private var m: array(i32, 4); +} + +var x: HasMember = {.m = (0, 2, 4, 6)}; +x.GetMut(2) += 1; +fn Check(y: Four) { + Assert(y.Get(2) == 5); +} +Check(x); +``` + +**Future work**: this will in the future often be done with an overloaded +method, as in: + +```carbon +class Four { + overload Access { + fn [bound ref self: Self](i: i32) -> ref i32 { + Assert(i >= 0 and i < 4); + return self.m[i]; + } + fn [self: Self](i: i32) -> i32 { + Assert(i >= 0 and i < 4); + return self.m[i]; + } + } + private var m: array(i32, 4); +} + +var x: HasMember = {.m = (0, 2, 4, 6)}; +x.Access(2) += 1; +fn Check(y: Four) { + Assert(y.Access(2) == 5); +} +Check(x); +``` + +This may be a common enough use case that we will want to introduce a dedicated +syntax: ```carbon class HasMember { - // Eventually these functions would be - // together in an overload set. - fn Get[self: Self]() -> i32 { return self.m; } - fn GetMut[bound ref self: Self]() -> i32 { - return self.m; + fn Access[bound ref? self: Self](i: i32) -> ref? i32 { + Assert(i >= 0 and i < 4); + return self.m[i]; } - var m: i32; + private var m: array(i32, 4); } ``` From 90d250e4350eaa6e81740f9dad7b520b68001a68 Mon Sep 17 00:00:00 2001 From: Josh L Date: Sun, 18 May 2025 01:33:29 +0000 Subject: [PATCH 12/57] Checkpoint progress. --- proposals/p5434.md | 66 +++++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 24 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 81fa16e0a1e75..b9d03a76d1823 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -39,7 +39,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Alternatives considered](#alternatives-considered) - [No `ref`, only pointers](#no-ref-only-pointers) - [Remove pointers after adding references](#remove-pointers-after-adding-references) - - [All `ref` bindings in the fields of classes](#all-ref-bindings-in-the-fields-of-classes) + - [Allow `ref` bindings in the fields of classes](#allow-ref-bindings-in-the-fields-of-classes) - [No call-site annotation](#no-call-site-annotation) - [Top-level `ref` introducer](#top-level-ref-introducer) - [`ref` as a type qualifier](#ref-as-a-type-qualifier) @@ -1046,8 +1046,8 @@ different lifetimes. ### Combining with compile-time -FIXME: We plan to support references to compile-time state when executing a -function at compile time. That will be part of a future proposal. +We plan to support references to compile-time state when executing a function at +compile time. That will be part of a future proposal. ### Interaction with `Call` or other interfaces @@ -1101,6 +1101,8 @@ and ### No `ref`, only pointers +FIXME + - [2025-05-01](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.dffumsu6wzlc) Reasons to add references: @@ -1141,6 +1143,8 @@ safety. ### Remove pointers after adding references +FIXME + Need to have something rebindable in an assignable object. The viable path forward without separate pointers and references is to have something rebindable like pointers but automatically dereferenced like references, which is the @@ -1149,12 +1153,12 @@ approach Rust takes. One of the features of a reference is what it cannot do, so can't build a pointer out of references. -### All `ref` bindings in the fields of classes +### Allow `ref` bindings in the fields of classes FIXME Did not see a reason to put references into objects, so keeping references out -of types. +of types. This could change to support lambda reference captures. ### No call-site annotation @@ -1188,28 +1192,41 @@ FIXME: big concern is effects, but also representing argument lists with tuples ### `bound` causes temporary lifetime extension -FIXME: No. Neither Rust nor C++ does this in these cases. +Neither Rust nor C++ will extend the lifetime of a temporary passed to a +function, which suggests doing so is not needed. ### `bound` would change the default return to `let` +We considered saying that `bound` would change the default return to use the +`->let` return convention. This was discussed on [2025-05-01](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.dffumsu6wzlc) and -[2025-05-08](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.vdognq1upsf5) +[2025-05-08](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.vdognq1upsf5). +The idea is that `let` is expected to be efficient, so we should encourage using +it, but we can't always use `let`, since some types have a reference value +representation, but `bound` alleviates that concern. -FIXME: Before we realized that `bound` is relevant for all return conventions. -Changing defaults is action at a distance, creating unexpected changes for -readers of the code. Don't want the return category to a function in an -interface using `bound`, and the same function in the `impl` of that `interface` -not. Lifetimes in Rust and Clang's `lifetimebound` don't change calling -conventions, only what code is valid. +Once we realized that `bound` is relevant for all return conventions, we +reconsidered that approach, since has a number of concerns: + +- Changing defaults is action at a distance, changing the behavior without + changing the code in the relevant location. +- We don't want to have to make changes to the return category of copy-paste + of a function from an interface to an `impl` of it when removing `bound`. +- Lifetimes in Rust and Clang's `[[clang::lifetimebound]]` don't change + calling conventions, only what code is valid. + +Going with an approach where less depends on `bound` makes sense for now, since +we are going to reconsider these issues as part of our upcoming memory safety +work. ### Other return conventions +We also considered other conventions for returning from functions, on [2025-05-08](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.vdognq1upsf5) and -[2025-05-12](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.1mjh6unumnwu) - -We also considered other conventions for returning from functions, most notably: +[2025-05-12](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.1mjh6unumnwu), +most notably: - **var without storage**: callee returns pointer to the storage of a subobject of a `bound var` parameter, that caller is then responsible for @@ -1223,10 +1240,11 @@ There were also some variations on what the conditions for returning in registers using the default return convention. We considered "var without storage" the longest, but the fact that it couldn't -reliably used to initialize a variable meant it did not seem valuable enough to -include. It seemed more valuable to support the current `-> var`. That return -form allows you to guarantee knowing the address of the object being -constructed, and was a good match for `returned var`. +reliably used to initialize a variable, particularly in the middle of an object, +meant it did not seem valuable enough to include. It seemed more valuable to +support the current `-> var`. That return form allows you to guarantee knowing +the address of the object being constructed, and was a good match for +`returned var`. ### `return var` with compound return forms @@ -1300,13 +1318,13 @@ fn F(...) -> (->var V1, ->var V2, ->var V3) { ### Other syntax for compound return forms +We considered other options for the syntax of compound return forms on [2025-05-13](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.bdznj2d0by2g), [#syntax in Discord on 2025-05-14](https://discord.com/channels/655572317891461132/709488742942900284/1372285365162872943), and -[2025-05-14](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.52tb7l2he343) - -Omitting the `->` in each component did not distinguish tuples from paren return -forms sufficiently: +[2025-05-14](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.52tb7l2he343). +The option of omitting the `->` in each component did not distinguish tuples +from paren return forms sufficiently: ```carbon -> (ref i32, var i32) From 8151bed701d6a71a39bb296db761cdbed2011811 Mon Sep 17 00:00:00 2001 From: Josh L Date: Sun, 18 May 2025 15:45:16 +0000 Subject: [PATCH 13/57] Checkpoint progress. --- proposals/p5434.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index b9d03a76d1823..3ff88fc6b6a46 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -21,6 +21,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Details](#details) - [Initializing returns](#initializing-returns) - [Compound return forms and patterns](#compound-return-forms-and-patterns) + - [Resolution of #5250](#resolution-of-5250) - [`bound` parameters](#bound-parameters) - [How addresses interact with `ref`](#how-addresses-interact-with-ref) - [Improved C++ interop and migration](#improved-c-interop-and-migration) @@ -335,9 +336,9 @@ var j: i32 = ReturnVar(); caller provides the address of storage to initialize with an object of type `T`, that the caller owns upon return. The object will never be returned in registers. -- A call to a function declared `-> T` is an initializing expression. It will - either match `-> var T` or return by copy, depending on the initializing - representation of `T`. +- A call to a function declared `-> T` is unchanged. It is an initializing + expression. It will either match `-> var T` or return by copy, depending on + the initializing representation of `T`. A function may have multiple returns, each with their own marker, by using a paren or brace compound return form. @@ -519,6 +520,11 @@ This feature is intended to support cases like `enumerate` that will want to return a value for the index but a reference to the element of the sequence being enumerated. +#### Resolution of #5250 + +FIXME: +[nested bindings within a destructured `var`](https://github.com/carbon-language/carbon-lang/issues/5250) + ### `bound` parameters It is an error to return a reference to a temporary that won't live once the From 7172159d291fa34315a01ca160ec0eda174917e7 Mon Sep 17 00:00:00 2001 From: Josh L Date: Mon, 19 May 2025 21:23:50 +0000 Subject: [PATCH 14/57] Checkpoint progress. --- proposals/p5434.md | 82 ++++++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 32 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 3ff88fc6b6a46..480505f1aec5c 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -36,6 +36,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [More precise lifetimes](#more-precise-lifetimes) - [Combining with compile-time](#combining-with-compile-time) - [Interaction with `Call` or other interfaces](#interaction-with-call-or-other-interfaces) + - [Destructuring assignment](#destructuring-assignment) - [Rationale](#rationale) - [Alternatives considered](#alternatives-considered) - [No `ref`, only pointers](#no-ref-only-pointers) @@ -45,6 +46,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Top-level `ref` introducer](#top-level-ref-introducer) - [`ref` as a type qualifier](#ref-as-a-type-qualifier) - [`bound` causes temporary lifetime extension](#bound-causes-temporary-lifetime-extension) + - [Reference bindings to temporaries](#reference-bindings-to-temporaries) - [`bound` would change the default return to `let`](#bound-would-change-the-default-return-to-let) - [Other return conventions](#other-return-conventions) - [`return var` with compound return forms](#return-var-with-compound-return-forms) @@ -150,32 +152,16 @@ fn G() { } ``` -The use of the name (`x` in the example) of a `ref` binding forms a reference -expression. The address of a `ref` bound name gives the address of the bound -object, so `&x == ptr` above. The reference itself does not have an address, and -unlike a pointer can't be rebound to reference a different object. +The use of the name (`x` in the example) of a `ref` binding forms a durable +reference expression. We ensure that reference expressions formed by way of +reference bindings _do not dangle_. A `ref` binding may only bind to a durable +reference expression, not a value expression, initializing expression, or +ephemeral reference expression. The bound durable reference expression must +outlive the `ref` binding. -We ensure that reference expressions formed by way of reference bindings _do not -dangle_. For example, temporaries bound to a reference have their -[lifetime extended, as is done in C++](https://en.cppreference.com/w/cpp/language/reference_initialization#Lifetime_of_a_temporary): - -```carbon -fn ReturnsInt() -> i32; -fn ExtendTempLifetime() -> i32 { - // `ReturnsInt()` returns a temporary whose - // lifetime is extended to match `r`. - let ref r: i32 = ReturnsInt(); - r += 1; - return r + 2; - // Lifetime of `r` and temporary ends here. -} -``` - -FIXME: QUESTION: Are there use cases where we really need temporary lifetime -extension? Was asked for in -[leads issue #5261: "We should add `ref` bindings to Carbon, paralleling reference expressions"](https://github.com/carbon-language/carbon-lang/issues/5261). -Seems like `TakesARefParam(ReturnsATemporary())` is likely to be a bug, and -`let ref ... = ReturnsATemporary();` could be changed to `var` instead. +The address of a `ref` bound name gives the address of the bound object, so +`&x == ptr` above. The reference itself does not have an address, and unlike a +pointer can't be rebound to reference a different object. We remove `addr`, and use instead use `ref` for the `self` parameter when an object is required. Note that the type will change from `Self*` to `Self` in @@ -222,7 +208,7 @@ fn AddTwoToRef(ref x: i32) { // Equivalent to: fn AddTwoToRef(ref x: i32) { x += 1; - let ref y_ptr: i32* = &x; + let y_ptr: i32* = &x; *y_ptr += 1; } ``` @@ -279,12 +265,11 @@ parameter, and so will not be marked with `ref` at the call site. As an _experiment_, we are saying a pointer formed by taking the address of a `ref` bound name is LLVM-`captures(none)` and LLVM-`noalias`. This means that -while a `ref` parameter could be passed into a function by address, but the -restrictions are intended to also allow a "move-in-move-out" approach (once we -define the move operation), assuming it is not -[marked `bound`](#bound-parameters). The intent here is to leave the door open -to a calling convention using registers and less indirection for small-enough -objects. +while a `ref` parameter could be passed into a function by address, the +restrictions also allow a "move-in-move-out" approach (once we define the move +operation), assuming it is not [marked `bound`](#bound-parameters). The intent +here is to leave the door open to a calling convention using registers and less +indirection for small-enough objects. ### `ref`, `let`, and `var` returns @@ -1063,6 +1048,24 @@ This will be tackled together in a future proposal with other aspects of bindings not represented by the type, such as `var` and compile-time, along with being generic across these aspects of bindings. +### Destructuring assignment + +Having more support for multiple returns from a function opens the question of +how to do different things with the different returns. We may want a syntax for +saying some of the returns are bound to new names, and some are used in +assignments to existing variables. One possibility would be to have some pattern +syntax for re-initializing an existing object, as in: + +```carbon +fn F() -> (-> bool, ->T); +fn G() { + var x: T = ...; + Consume(~x); + let (b: bool, init x) = F(); + // Continue to use `x`... +} +``` + ## Rationale This proposal tries to advance these [Carbon goals](/docs/project/goals.md): @@ -1201,6 +1204,21 @@ FIXME: big concern is effects, but also representing argument lists with tuples Neither Rust nor C++ will extend the lifetime of a temporary passed to a function, which suggests doing so is not needed. +### Reference bindings to temporaries + +We preferred to use a `var` binding for temporaries and initializing +expressions, rather than allow a `ref` binding to a temporary and then do +lifetime extension, +[as is done in C++](https://en.cppreference.com/w/cpp/language/reference_initialization#Lifetime_of_a_temporary). + +This means that objects given storage that lasts beyond the declaration are +consistently marked with `var`, and we can diagnose constructs like +`TakesARefParam(ReturnsATemporary())` which are likely to be bugs. + +It reduces the complexity of our temporary lifetime rules. There have been two +Abseil tip of the weeks on this subject, [#101](https://abseil.io/tips/101) and +[#107](https://abseil.io/tips/107), indicating it is a source of trouble in C++. + ### `bound` would change the default return to `let` We considered saying that `bound` would change the default return to use the From 66a6f47321e3b29c1f97a93df7d92219dc26b0fa Mon Sep 17 00:00:00 2001 From: Josh L Date: Mon, 19 May 2025 21:44:55 +0000 Subject: [PATCH 15/57] Checkpoint progress. --- proposals/p5434.md | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 480505f1aec5c..d616d612139f1 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -21,7 +21,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Details](#details) - [Initializing returns](#initializing-returns) - [Compound return forms and patterns](#compound-return-forms-and-patterns) - - [Resolution of #5250](#resolution-of-5250) + - [Nested binding patterns](#nested-binding-patterns) - [`bound` parameters](#bound-parameters) - [How addresses interact with `ref`](#how-addresses-interact-with-ref) - [Improved C++ interop and migration](#improved-c-interop-and-migration) @@ -80,12 +80,13 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception Reference bindings have come up multiple times: -- as a better alternative to `addr self: Self*`, -- for use in [lambda captures](/docs/design/lambdas.md), -- to support - [nested bindings within a destructured `var`](https://github.com/carbon-language/carbon-lang/issues/5250), +- as a better alternative to `addr self: Self*`; +- for use in [lambda captures](/docs/design/lambdas.md); +- to support nested bindings within a destructured `var`, see + [#5250](https://github.com/carbon-language/carbon-lang/issues/5250) and + [proposal #5164](https://github.com/carbon-language/carbon-lang/pull/5164); - for forwarding arguments while preserving - [expression category](/docs/design/README.md#expression-categories), and + [expression category](/docs/design/README.md#expression-categories); and - to support breaking up an expression into pieces without altering the expression category of individual pieces. @@ -505,10 +506,29 @@ This feature is intended to support cases like `enumerate` that will want to return a value for the index but a reference to the element of the sequence being enumerated. -#### Resolution of #5250 +### Nested binding patterns -FIXME: -[nested bindings within a destructured `var`](https://github.com/carbon-language/carbon-lang/issues/5250) +Since a `ref` binding may only bind to a durable reference expression, it can't +be used to bind the result of a function returning an initializing expression. +However, if the initializing expression is bound to a `var`, any nested patterns +are reference binding patterns bound to the subobject, following +[proposal #5164: "Updates to pattern matching for objects"](https://github.com/carbon-language/carbon-lang/pull/5164). + +For example: + +```carbon +fn F() -> (bool, (C, i32)); +let (b: bool, var (c: C, i: i32)) = F(); +``` + +is equivalent to: + +```carbon +fn F() -> (bool, (C, i32)); +let (b: bool, var v: (C, i32)) = F(); +let ref c: C = v.0; +let ref i: i32 = v.1; +``` ### `bound` parameters @@ -1066,6 +1086,9 @@ fn G() { } ``` +This was discussed in +[the #syntax channel on Discord on 05-19-2025](https://discord.com/channels/655572317891461132/709488742942900284/1374126123595727000). + ## Rationale This proposal tries to advance these [Carbon goals](/docs/project/goals.md): From 8060cf781825f881940f74c8fe3f42ffc416e35f Mon Sep 17 00:00:00 2001 From: Josh L Date: Mon, 19 May 2025 22:17:07 +0000 Subject: [PATCH 16/57] Checkpoint progress. --- proposals/p5434.md | 94 ++++++++++++++++++++++++++++------------------ 1 file changed, 57 insertions(+), 37 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index d616d612139f1..14c41091360e6 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -873,9 +873,9 @@ class SmartPtr(T:! type) { ### Use case: indexing interfaces -FIXME - -Before: +[Proposal #2274: "Subscript syntax and semantics"](https://github.com/carbon-language/carbon-lang/pull/2274) +added the interfaces used to support indexing with the subscripting operator +`[`...`]`, see the [indexing design](/docs/design/expressions/indexing.md): ```carbon interface IndexWith(SubscriptType:! type) { @@ -902,18 +902,23 @@ final impl forall } ``` -After: +We change these in the following ways: + +- The `addr self` parameters are changed to `bound ref self`, to allow the + result to reference the `self` object. +- The `Addr` methods are renamed `Ref` and return a reference instead of a + pointer that is automatically dereferenced. ```carbon interface IndexWith(SubscriptType:! type) { let ElementType:! type; fn At[self: Self](subscript: SubscriptType) -> ElementType; - fn Addr[bound ref self: Self](subscript: SubscriptType) -> ref ElementType; + fn Ref[bound ref self: Self](subscript: SubscriptType) -> ref ElementType; } interface IndirectIndexWith(SubscriptType:! type) { require Self impls IndexWith(SubscriptType); - fn Addr[bound self: Self](subscript: SubscriptType) -> ref ElementType; + fn Ref[bound self: Self](subscript: SubscriptType) -> ref ElementType; } final impl forall @@ -921,21 +926,21 @@ final impl forall T as IndexWith(SubscriptType) { let ElementType:! type = T.(IndirectIndexWith(SubscriptType).ElementType); fn At[self: Self](subscript: SubscriptType) -> ElementType { - return self.(IndirectIndexWith(SubscriptType).Addr)(index); + return self.(IndirectIndexWith(SubscriptType).Ref)(index); } - fn Addr[bound ref self: Self](subscript: SubscriptType) -> ref ElementType { - return self.(IndirectIndexWith(SubscriptType).Addr)(index); + fn Ref[bound ref self: Self](subscript: SubscriptType) -> ref ElementType { + return self.(IndirectIndexWith(SubscriptType).Ref)(index); } } ``` -`lhs[index]` with `lhs` of type `T` and `index` of type `I` will be rewritten to -one of: +With these changes, `lhs[index]` with `lhs` of type `T` and `index` of type `I` +will be rewritten to one of: - Before: `*(lhs.(IndirectIndexWith(I).Addr)(index))` -> After: - `lhs.(IndirectIndexWith(I).Addr)(index)` + `lhs.(IndirectIndexWith(I).Ref)(index)` - Before: `*(lhs.(IndexWith(I).Addr)(index))` -> After: - `lhs.(IndexWith(I).Addr)(index)` + `lhs.(IndexWith(I).Ref)(index)` - Unchanged: `lhs.(IndexWith(I).At)(index)` depending on whether `T` is known to implement `IndirectIndexWith(I)` and @@ -1175,37 +1180,40 @@ safety. ### Remove pointers after adding references -FIXME - -Need to have something rebindable in an assignable object. The viable path -forward without separate pointers and references is to have something rebindable -like pointers but automatically dereferenced like references, which is the -approach Rust takes. +If we removed pointers after adding references, we would need something +rebindable for assignable objects. The viable path forward without separate +pointers and references is to have something rebindable like pointers but +automatically dereferenced like references, which is the approach Rust takes. +See +[this comment on issue #5261](https://github.com/carbon-language/carbon-lang/issues/5261#issuecomment-2786462775). -One of the features of a reference is what it cannot do, so can't build a -pointer out of references. +One of the features of a reference is what it cannot do, and we would have to +remove those restrictions to be able to satisfy the pointer use cases with +references. ### Allow `ref` bindings in the fields of classes -FIXME - -Did not see a reason to put references into objects, so keeping references out -of types. This could change to support lambda reference captures. +A type with reference binding fields would need a lot of restrictions since +reference bindings are not assignable. We did not see enough motivation to put +references into objects, given the complexity that it would introduce, so we are +keeping references out of types for now. This could change to support lambda +reference captures. ### No call-site annotation -FIXME -[2025-05-07](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.sfx9d7ltud5) - -The marking is not about lifetime, but ability to mutate. A `let` may reference -an object in a similar way to a `ref`, restricting operations on the original -object, but we are not going to mark `let`s since those restrictions are -enforced by the compiler. +This question was discussed on +[2025-05-07](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.sfx9d7ltud5). -Swift `inout` parameters are marked at caller with an `&` before the argument -https://docs.swift.org/swift-book/documentation/the-swift-programming-language/functions/#In-Out-Parameters +We decided that the marking is not about lifetime, but ability to mutate. A +`let` may reference an object in a similar way to a `ref`, restricting +operations on the original object, but we are not going to mark `let`s since +those restrictions are enforced by the compiler. We thought the ability to +mutate, though, was something important enough to highlight to readers of the +code, even at the expense of extra work for the writer. -And Jordon Rose has published +Swift `inout` parameters are +[marked at caller with an `&` before the argument](https://docs.swift.org/swift-book/documentation/the-swift-programming-language/functions/#In-Out-Parameters). +Jordon Rose has published [a regret](https://belkadan.com/blog/2021/12/Swift-Regret-inout-Syntax/) that they didn't use `inout` to mark the argument instead of `&`. @@ -1216,11 +1224,23 @@ This is a "try it and see how well it works" sort of decision. ### Top-level `ref` introducer For now, we don't believe `let ref` to be so common as to need a shorter way to -write, unlike what we do for `var. +write, unlike what we do for `var`. ### `ref` as a type qualifier -FIXME: big concern is effects, but also representing argument lists with tuples +The big concern is that any effect that is represented by a type, like +`Optional` or `Result`, will want to compose with reference returns. This could +be done by allowing `ref` to create an object type that could be used as a +parameter to those, as in `Optional(ref T)`, but +[we are trying to avoid going down that path](https://github.com/carbon-language/carbon-lang/issues/5261#issuecomment-2790421894). +We have [future work](#interaction-with-effects) to tackle this problem +specifically. + +There was also +[a concern that we might need `ref` types to represent argument lists with tuples](https://github.com/carbon-language/carbon-lang/issues/5261#issuecomment-2790506515), +but tuples already can't represent `var` or compile-time parameters. We have +other plans for this, instead of trying to stretch tuples to encompass these use +cases. ### `bound` causes temporary lifetime extension From 4b1584d6cd79fde56240f0c5776f97c0393fe2ad Mon Sep 17 00:00:00 2001 From: Josh L Date: Tue, 20 May 2025 00:34:42 +0000 Subject: [PATCH 17/57] Checkpoint progress. --- proposals/p5434.md | 152 ++++++++++++--------------------------------- 1 file changed, 39 insertions(+), 113 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 14c41091360e6..ff2b09bb6a95b 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -31,6 +31,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Use case: indexing interfaces](#use-case-indexing-interfaces) - [Use case: class accessors](#use-case-class-accessors) - [Future work](#future-work) + - [Temporary lifetimes](#temporary-lifetimes) - [`ref` bindings in lambdas](#ref-bindings-in-lambdas) - [Interaction with effects](#interaction-with-effects) - [More precise lifetimes](#more-precise-lifetimes) @@ -45,8 +46,6 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [No call-site annotation](#no-call-site-annotation) - [Top-level `ref` introducer](#top-level-ref-introducer) - [`ref` as a type qualifier](#ref-as-a-type-qualifier) - - [`bound` causes temporary lifetime extension](#bound-causes-temporary-lifetime-extension) - - [Reference bindings to temporaries](#reference-bindings-to-temporaries) - [`bound` would change the default return to `let`](#bound-would-change-the-default-return-to-let) - [Other return conventions](#other-return-conventions) - [`return var` with compound return forms](#return-var-with-compound-return-forms) @@ -156,9 +155,8 @@ fn G() { The use of the name (`x` in the example) of a `ref` binding forms a durable reference expression. We ensure that reference expressions formed by way of reference bindings _do not dangle_. A `ref` binding may only bind to a durable -reference expression, not a value expression, initializing expression, or -ephemeral reference expression. The bound durable reference expression must -outlive the `ref` binding. +reference expression or an expression that can be converted to one. The bound +durable reference expression must outlive the `ref` binding. The address of a `ref` bound name gives the address of the bound object, so `&x == ptr` above. The reference itself does not have an address, and unlike a @@ -272,6 +270,17 @@ operation), assuming it is not [marked `bound`](#bound-parameters). The intent here is to leave the door open to a calling convention using registers and less indirection for small-enough objects. +This means that the following code is invalid: + +```carbon +fn F(ref a: i32, ref b: i32) -> bool; + +fn G() -> bool { + var v: i32 = 1; + return F(ref v, ref v); +} +``` + ### `ref`, `let`, and `var` returns The return of a function can optionally be marked `ref`, `let`, or `var`. These @@ -532,8 +541,8 @@ let ref i: i32 = v.1; ### `bound` parameters -It is an error to return a reference to a temporary that won't live once the -function returns, even if it is a parameter marked `bound`. +It is an error to return a reference to a temporary object that won't live once +the function returns, even if it is a parameter marked `bound`. ```carbon fn Invalid1() -> i32* { @@ -647,9 +656,9 @@ impl C2 as I { Like `[[clang::lifetimebound]]` in C++, `bound` does not affect semantics or calling conventions, just what code is legal. This helps avoid mismatches -between typechecking against the signatures in an interface when the impl -functions are different. In particular, a temporary is passed to a `bound` -parameter is an error, instead of having its lifetime extended. +between typechecking against the signatures in an interface when the `impl` +functions are different. Exception: the question of whether `bound` affects the +lifetime of temporaries is [future work](#temporary-lifetimes). Note that all combinations of a `var`/`ref`/`let` return can be bound to a `var`/`ref`/`let` parameter. Examples: @@ -661,7 +670,7 @@ fn VarToRef(bound var p: i32*) -> ref i32 { return *p; } fn VarToVar(bound var p: i32*) -> var i32* { return p; } ``` -For full safety, need a bound variable to be immutably borrowed for the duration +For full safety, we need each bound parameter to be immutable for the duration of the lifetime of the returned result. However, the objective for now is only matching `[[clang::lifetimebound]]`, which has the goal of preventing some classes of bugs, not full memory safety. We will reconsider this with the memory @@ -669,64 +678,9 @@ safety design. ### How addresses interact with `ref` -FIXME - -The suggested model is that `ref` bindings mirror reference expressions in that -they refer back to some underlying object. As a consequence, it should be -possible to take the address of a `ref` binding and get the address of that -object. - -However, we expect reference expressions and as consequence `ref` bindings to -work more like Swift `inout` than like a pointer: there may be implicit copies -or moves that occur prior to forming the reference expression, or binding it to -a name. The goal is that it should be possible for some types to implement `ref` -parameters through move-in / move-out semantics. - -When we have a `ref` _binding_ specifically, we expect its address to be -_stable_ for the lifetime of the binding. And there is no valid move-in/move-out -semantic model for _overlapping_ bindings -- those must all reference the same -underlying object, and the address of those must all match in addition to being -stable. But for non-overlapping bindings such as parameters, a move-in/move-out -model should be equally valid from the perspective of the `ref` binding, and the -address within the function might be different from the address in the caller. - -At least in cases where a type permits move-in/move-out, the address of a `ref` -parameter should be implicitly `nocapture` in LLVM's semantic model for example. -Whether we go further and restrict `ref` to be LLVM-`nocapture` more broadly is -an open question that can likely also be an area for future work. - -FIXME: also `noalias` see -https://github.com/carbon-language/carbon-lang/issues/5261#issuecomment-2843057728 - -Do we allow shared mutable `ref`s to the same object? How does that interact -with the ability to support move-in-move-out? For example: - -```carbon -// Suppose i32 has move-in-move-out semantics for ref parameters. -fn F(ref a: i32, ref b: i32) -> bool { - // Can this affect the value of b? - a = 1; - // Can this return true? - return &a == &b; -} -fn G() -> bool { - var v: i32; - return F(v, v); -} -``` - -It seems like the restriction we would want on `ref` parameters to support -move-in-move-out in cases like the above, and still have `ref` behave like a -reference, would be something similar to `noalias` / a "no shared references" -rule. It could well be that that's exactly what we want, but I think it's -important that we're explicit about what choice we're making here. - -If we can't make the difference between pass-by-pointer (alias) and -move-in-move-out (borrow) unobservable for `ref`, for example by adding -restrictions that prohibit observing the difference, I think we should consider -having both `ref` and `inout` so that the programmer can state their intent. - -FIXME: +The address of a `ref` binding is `noalias` and either `captures(none)` or +`captures(ret: address, provenance)`, depending on whether the binding is marked +`bound`. - `noalias` means like C `restrict`; you can't observe mutations through aliases; mutation through a restricted pointer is not observable through @@ -736,9 +690,6 @@ FIXME: - `captures(ret: address, provenance)`: is like `captures(none)` but may be referenced by a return. -Will use `bound` marker (FIXME: link bound section) to pick between these last -two options. - The `noalias` semantics are the minimum for the optimization. But this condition is hard to check, so safe code will use a stricter criteria. Unsafe code will be required to adhere to the looser `noalias` restrictions, but will not be checked @@ -751,28 +702,9 @@ their requirements can be established. This avoids introducing undefined behavior, which we particularly don't want to do in situations where C++ doesn't. -Outcome going for: nudges us towards function boundaries that don't kind of +The goal of these rules it to nudge us towards function boundaries that don't constructively create aliasing in their API boundary and don't capture pointers -unnecessarily. I think we can actually enforce the nocapture side of this and we -should reject constructive aliasing between arguments. And if that works, we get -the benefit that this will cause programmers to use patterns that don't -introduce aliasing that inhibits optimizations. - -Two ways in which optimizer runs into aliasing problems: data structures on the -heap where we want to disable optimizations that rely on assuming pointers don't -alias, which is largely unavoidable. Second way, is at a function boundary, but -this can often be avoided with a different function API. The goal is to -encourage function APIs that don't use create aliases, or it is checkable at -call sites when it is happening. - -Important property: reference bindings nest lexically. Don't have a reference -binding whose lifetime partially overlaps. Means the inner reference binding -would have to be based on the outer one anyway, which I think makes it okay. If -have a `let` binding to the same object, we need to prevent modifications to -that object, whether that is through a `ref` binding or any other way. This -suggests not allowing a `ref` binding to another function while a `let` binding -is active, an not allowing the same object to be passed as both a `ref` and -`let` to the same function. +unnecessarily. These restrictions are experimental, and we should keep track of everything we end up needing to do to work around these restrictions so any reconsideration @@ -1014,6 +946,20 @@ class HasMember { ## Future work +### Temporary lifetimes + +For safety, we need bindings and returns that reference storage to only be used +while that storage remains valid. When the referenced storage is owned by a +temporary, we have a choice to either control the lifetime of the temporary or +diagnose when the lifetime of the temporary is insufficient. Deciding on our +policy is future work. + +Note that in many cases we can explicitly provide storage in a variable instead +of referencing a temporary. For example, using +`var x: ... = ReturnsATemporary();` instead of +`let ref x: ... = ReturnsATemporary();`. This won't apply in all situations, +though, such as temporaries that are reachable transitively through pointers. + ### `ref` bindings in lambdas We have already identified @@ -1242,26 +1188,6 @@ but tuples already can't represent `var` or compile-time parameters. We have other plans for this, instead of trying to stretch tuples to encompass these use cases. -### `bound` causes temporary lifetime extension - -Neither Rust nor C++ will extend the lifetime of a temporary passed to a -function, which suggests doing so is not needed. - -### Reference bindings to temporaries - -We preferred to use a `var` binding for temporaries and initializing -expressions, rather than allow a `ref` binding to a temporary and then do -lifetime extension, -[as is done in C++](https://en.cppreference.com/w/cpp/language/reference_initialization#Lifetime_of_a_temporary). - -This means that objects given storage that lasts beyond the declaration are -consistently marked with `var`, and we can diagnose constructs like -`TakesARefParam(ReturnsATemporary())` which are likely to be bugs. - -It reduces the complexity of our temporary lifetime rules. There have been two -Abseil tip of the weeks on this subject, [#101](https://abseil.io/tips/101) and -[#107](https://abseil.io/tips/107), indicating it is a source of trouble in C++. - ### `bound` would change the default return to `let` We considered saying that `bound` would change the default return to use the From c89d46d188f74ee5007537dc2ad204ff3ec07c57 Mon Sep 17 00:00:00 2001 From: Josh L Date: Tue, 20 May 2025 05:44:23 +0000 Subject: [PATCH 18/57] Ready for review --- proposals/p5434.md | 71 ++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 37 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index ff2b09bb6a95b..0990acc24fcc3 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -82,10 +82,16 @@ Reference bindings have come up multiple times: - as a better alternative to `addr self: Self*`; - for use in [lambda captures](/docs/design/lambdas.md); - to support nested bindings within a destructured `var`, see - [#5250](https://github.com/carbon-language/carbon-lang/issues/5250) and + [issue #5250](https://github.com/carbon-language/carbon-lang/issues/5250) + and [proposal #5164](https://github.com/carbon-language/carbon-lang/pull/5164); - for forwarding arguments while preserving - [expression category](/docs/design/README.md#expression-categories); and + [expression category](/docs/design/README.md#expression-categories); +- to add a feature to pattern matching to modify things after they have been + matched; +- to support refactoring code without changing all the uses of a name, a + problem we are already seeing with `self` and `addr self`, and would be a + point of friction in local pattern matching in the future; and - to support breaking up an expression into pieces without altering the expression category of individual pieces. @@ -1084,45 +1090,36 @@ and ### No `ref`, only pointers -FIXME - -- [2025-05-01](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.dffumsu6wzlc) - -Reasons to add references: +The rationale to add `ref` instead of staying with pointers was discussed on +[2025-05-01](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.dffumsu6wzlc). +In addition to the motivating problems given in +[the "Problem" section](#problem), that discussion included some additional +depth to the reasons to add reference bindings. There is a tension between wanting to have mutating expressions and only having -pointers. Think you need some concept like a reference in order to mutate an -object with an object with an expression. Question is how small a box it is +pointers. You need some concept like a reference in order to mutate an object +with an expression. The question is how small a box the references are restricted to, and where the line is drawn. C has lvalues, which contain references but are restricted to a quite small box. Reference bindings -specifically about keeping a small box around references while still enough -expressivity. Started with a C-ish model, but it fell down when it comes down to -composition. Decomposing an expression into pieces loses the tools the -expression provided to you. Missing piece was reference bindings. - -Saw how much we were leaning on value bindings. Also the asymmetry between -having value binding but not referenced bindings when have value expressions and -reference expressions was creating pressure. Example: accessing members of an -object, had to escape to pointers in that operator. - -Expectation that we would have a goal to use references instead of pointers when -possible. - -- One of the features of pattern matching we want is being able to modify - things after they have been matched. Will be a reference. -- Refactoring code without changing all the uses of a name. Already seeing - this problem with `self` and `addr self`, and would be a point of friction - in local pattern matching in the future. -- Expected to help increase the expressiveness of lambda captures. - -Counter: good to see indirections when they are present - -This does fundamentally mean that we now have another kind of "pointer", -potentially adding complexity to any memory-safety story. However, I think this -ship already sailed to some extent with value bindings. Fundamentally, bindings -are allowed to have pointer-like semantics from a lifetime perspective, and so -will need to be considered as a pointer-like thing as we build out lifetime -safety. +specifically are about keeping a small box around references while still adding +enough expressivity to support our use cases. We have started with a model +similar to C, but it fell down when it comes down to composition. Decomposing an +expression into pieces loses the tools the expression provided to you. The +missing tool for that was reference bindings. + +We saw how much we were leaning on value bindings. The asymmetry between having +value binding but not referenced bindings when have value expressions and +reference expressions was creating pressure. For example, when accessing members +of an object, we had to escape to pointers in that operator. + +One downside of this change is that before indirections were more visible in the +code. + +Also, this does fundamentally mean that we now have another kind of "pointer", +potentially adding complexity to any memory-safety story. However, this ship +already sailed to some extent with value bindings. Fundamentally, bindings are +allowed to have pointer-like semantics from a lifetime perspective, and so will +need to be considered as a pointer-like thing as we build out lifetime safety. ### Remove pointers after adding references From 0894f6871d47689535f06c84863670c425470256 Mon Sep 17 00:00:00 2001 From: Josh L Date: Tue, 20 May 2025 05:47:34 +0000 Subject: [PATCH 19/57] Oops, missed a couple of final touches --- proposals/p5434.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 0990acc24fcc3..b64dabeebfac9 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -58,10 +58,9 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - A parameter binding can be marked `ref` instead of `var` or the default. It will bind to reference argument expressions in the caller and produces a reference expression in the callee. - - Unlike pointers, a `ref` binding may not be rebound to a different - object. + - Unlike pointers, a `ref` binding can't be rebound to a different object. - This replaces `addr`, and is not restricted to the `self` parameter. - - A `ref` binding, like a `let` binding, may not be used in fields of + - A `ref` binding, like a `let` binding, can't be used in fields of classes or structs. - When calling functions, arguments to non-`self` `ref` parameters are also marked with `ref`. @@ -730,7 +729,8 @@ This does create a migration concern, raised in [open discussion on 2025-05-01](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.dffumsu6wzlc), that the `nocapture` and `noalias` modifiers don't match C++ restrictions, particularly on the `this` parameter that we are going to require migrate to -`ref self`. +`ref self`. We may have to add back in `addr` to allow a different pointer type +for those cases. ### Part of the expression type system, not object types From acec7d91f6dfeac2cb47d76b89f93efda59c7910 Mon Sep 17 00:00:00 2001 From: josh11b <15258583+josh11b@users.noreply.github.com> Date: Tue, 20 May 2025 20:43:04 -0700 Subject: [PATCH 20/57] Apply suggestions from code review Co-authored-by: Geoff Romer --- proposals/p5434.md | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index b64dabeebfac9..6c73cb656d89d 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -199,8 +199,7 @@ class C { Potentially abbreviating the syntax further (to allow `ref self` as a short form of `ref self: Self`) is left as future work. -The `ref` modifier is allowed on any `:` parameter pattern (as an alternative to -`var`) or any `let` binding in a function body. +The `ref` modifier is allowed on any `:` binding that's not inside a `var` pattern. ```carbon fn AddTwoToRef(ref x: i32) { @@ -237,7 +236,7 @@ class Invalid { } ``` -When calling functions, arguments to non-`self` `ref` parameters are also marked +In a function argument list, arguments to non-`self` `ref` parameters are also marked with `ref`. Continuing the example: ```carbon @@ -258,11 +257,11 @@ Operators will mostly not take `ref` parameters, with these exceptions: - [the address-of operator](/docs/design/expressions/pointer_operators.md) `&`; -- [the indexing operator](/docs/design/expressions/indexing.md) `[`...`]`; +- the first operand of [the indexing operator](/docs/design/expressions/indexing.md) `[`...`]`; - [the member access operator](/docs/design/expressions/member_access.md) `.`; and -- [the assignment operators](/docs/design/assignment.md) such as `=`, `+=`, - and `++`. +- the left-hand operand of [assignment operators](/docs/design/assignment.md) such as `=` and `+=`. +- [the `++` and `--` operators](/docs/design/assignment.md). In the cases that they do, observe that the `ref` parameter will be the `self` parameter, and so will not be marked with `ref` at the call site. @@ -327,7 +326,7 @@ fn ReturnVar() -> var i32 { var j: i32 = ReturnVar(); ``` -- A call to a function declared `-> ref T` is a reference expression. The +- A call to a function declared `-> ref T` is a durable reference expression. The function will return the address of a `T` object. - A call to a function declared `-> let T` is a value expression. The function will return the value representation of `T`. Since values have no address, @@ -361,7 +360,7 @@ fn BraceReturn() If the return of a function may reference the storage of one or more parameters to the function, those parameters must be marked `bound`. This allows the -compiler to diagnose if the function's return is used after the lifetime of and +compiler to diagnose if the function's return is used after the lifetime of the `bound` parameter ends. The semantics of `bound` are intended to match the [`clang::lifetimebound` attribute](https://clang.llvm.org/docs/AttributeReference.html#id8). @@ -410,7 +409,7 @@ limitations. ### Initializing returns The `-> var T` return form guarantees that `returned var` is the same variable -in the caller and callee. In contrast, the `-> T` return form that also produces +in the caller and callee. In contrast, the `-> T` return form also produces an initializing expression, but may introduce a copy depending on the initializing representation. For example: @@ -485,8 +484,8 @@ struct with one token of lookahead. Only types are allowed after a `-> let`, // `(bool, f32, C, i32)`. fn TupleReturn(...) -> (bool, f32, C, i32); -// ❌ Invalid, every element must start with -// `->` to be a form. +// ❌ Invalid, every element of a compound return form must +// start with `->`. fn Invalid1(...) -> (bool, ->let f32); // ❌ Invalid, can only specify return category @@ -627,8 +626,8 @@ fn Conditional(b: bool, bound ref r: C, bound p: C*) ``` The parameters of functions defined in an interface may also be marked as -`bound`. The `impl` of that interface for a type can only have `bound` in a -subset of the positions present in the interface. +`bound`. The `impl` of that interface for a type can omit occurrences of +`bound` from the interface, but cannot add new ones. ```carbon interface I { @@ -690,8 +689,8 @@ The address of a `ref` binding is `noalias` and either `captures(none)` or - `noalias` means like C `restrict`; you can't observe mutations through aliases; mutation through a restricted pointer is not observable through another pointer -- `captures(none)` means no transitive escape, can pass a nocapture pointer to - another nocapture function, can't store to memory or return +- `captures(none)` means there is no transitive escape: you can pass a nocapture pointer to + another nocapture function, but you can't store to memory or return - `captures(ret: address, provenance)`: is like `captures(none)` but may be referenced by a return. From 626c3ae270531eb63b28e147ec36ad4a056673f7 Mon Sep 17 00:00:00 2001 From: Josh L Date: Wed, 21 May 2025 04:48:19 +0000 Subject: [PATCH 21/57] Updates inspired by review --- proposals/p5434.md | 69 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 14 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 6c73cb656d89d..2d661c2d8ae9b 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -29,6 +29,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Interaction with `returned var`](#interaction-with-returned-var) - [Use case: `Deref` interface](#use-case-deref-interface) - [Use case: indexing interfaces](#use-case-indexing-interfaces) + - [Use case: member binding interfaces](#use-case-member-binding-interfaces) - [Use case: class accessors](#use-case-class-accessors) - [Future work](#future-work) - [Temporary lifetimes](#temporary-lifetimes) @@ -199,7 +200,8 @@ class C { Potentially abbreviating the syntax further (to allow `ref self` as a short form of `ref self: Self`) is left as future work. -The `ref` modifier is allowed on any `:` binding that's not inside a `var` pattern. +The `ref` modifier is allowed on any `:` binding that's not inside a `var` +pattern. ```carbon fn AddTwoToRef(ref x: i32) { @@ -236,8 +238,8 @@ class Invalid { } ``` -In a function argument list, arguments to non-`self` `ref` parameters are also marked -with `ref`. Continuing the example: +In a function argument list, arguments to non-`self` `ref` parameters are also +marked with `ref`. Continuing the example: ```carbon var z: i32 = 3; @@ -250,6 +252,12 @@ c.NewMethod(); Assert(c.Get() == 7); ``` +> **Note:** It is important that this restriction is syntactic, not just +> semantic, because it means that `ref` is never the first token of a full +> expression, and so we know without lookahead that a `ref` in a pattern context +> must be the start of a binding pattern, not the start of an expression +> pattern. + Normally an argument to a non-`ref` parameter should not be marked `ref`, but it is allowed in a generic context where the parameter may sometimes be `ref`. @@ -257,10 +265,12 @@ Operators will mostly not take `ref` parameters, with these exceptions: - [the address-of operator](/docs/design/expressions/pointer_operators.md) `&`; -- the first operand of [the indexing operator](/docs/design/expressions/indexing.md) `[`...`]`; +- the first operand of + [the indexing operator](/docs/design/expressions/indexing.md) `[`...`]`; - [the member access operator](/docs/design/expressions/member_access.md) `.`; and -- the left-hand operand of [assignment operators](/docs/design/assignment.md) such as `=` and `+=`. +- the left-hand operand of [assignment operators](/docs/design/assignment.md) + such as `=` and `+=`. - [the `++` and `--` operators](/docs/design/assignment.md). In the cases that they do, observe that the `ref` parameter will be the `self` @@ -326,8 +336,8 @@ fn ReturnVar() -> var i32 { var j: i32 = ReturnVar(); ``` -- A call to a function declared `-> ref T` is a durable reference expression. The - function will return the address of a `T` object. +- A call to a function declared `-> ref T` is a durable reference expression. + The function will return the address of a `T` object. - A call to a function declared `-> let T` is a value expression. The function will return the value representation of `T`. Since values have no address, the value representation may be returned in registers. @@ -409,9 +419,9 @@ limitations. ### Initializing returns The `-> var T` return form guarantees that `returned var` is the same variable -in the caller and callee. In contrast, the `-> T` return form also produces -an initializing expression, but may introduce a copy depending on the -initializing representation. For example: +in the caller and callee. In contrast, the `-> T` return form also produces an +initializing expression, but may introduce a copy depending on the initializing +representation. For example: ```carbon fn F() -> var T { @@ -626,8 +636,8 @@ fn Conditional(b: bool, bound ref r: C, bound p: C*) ``` The parameters of functions defined in an interface may also be marked as -`bound`. The `impl` of that interface for a type can omit occurrences of -`bound` from the interface, but cannot add new ones. +`bound`. The `impl` of that interface for a type can omit occurrences of `bound` +from the interface, but cannot add new ones. ```carbon interface I { @@ -689,8 +699,9 @@ The address of a `ref` binding is `noalias` and either `captures(none)` or - `noalias` means like C `restrict`; you can't observe mutations through aliases; mutation through a restricted pointer is not observable through another pointer -- `captures(none)` means there is no transitive escape: you can pass a nocapture pointer to - another nocapture function, but you can't store to memory or return +- `captures(none)` means there is no transitive escape: you can pass a + nocapture pointer to another nocapture function, but you can't store to + memory or return - `captures(ret: address, provenance)`: is like `captures(none)` but may be referenced by a return. @@ -884,6 +895,36 @@ depending on whether `T` is known to implement `IndirectIndexWith(I)` and whether `lhs` is a [durable reference expression](/docs/design/values.md#durable-reference-expressions). +### Use case: member binding interfaces + +The member binding interface used for reference expressions from +[proposal #3720](https://github.com/carbon-language/carbon-lang/pull/3720) can +now be changed to use references instead of pointers. + +Before: + +```carbon +// For a reference expression `x` with type `T` +// and an expression `y` of type `U`, `x.(y)` is +// `*y.((U as BindToRef(T)).Op)(&x)` +interface BindToRef(T:! type) { + extend impl as Bind(T); + fn Op[self: Self](p: T*) -> Result*; +} +``` + +After: + +```carbon +// For a reference expression `x` with type `T` +// and an expression `y` of type `U`, `x.(y)` is +// `y.((U as BindToRef(T)).Op)(x)` +interface BindToRef(T:! type) { + extend impl as Bind(T); + fn Op[self: Self](bound ref p: T) -> ref Result; +} +``` + ### Use case: class accessors A `ref` return can be used to expose the state of an object in a way that can be From 319f5ebbe3bc77bdee6b5488ad38b18476795d0b Mon Sep 17 00:00:00 2001 From: Josh L Date: Wed, 21 May 2025 04:54:53 +0000 Subject: [PATCH 22/57] Fix incorrect statement --- proposals/p5434.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 2d661c2d8ae9b..e7666e2ee95a9 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -273,8 +273,10 @@ Operators will mostly not take `ref` parameters, with these exceptions: such as `=` and `+=`. - [the `++` and `--` operators](/docs/design/assignment.md). -In the cases that they do, observe that the `ref` parameter will be the `self` -parameter, and so will not be marked with `ref` at the call site. +Even in these cases, the arguments will not be marked with `ref` at the call +site. (Generally the `ref` parameter is the `self` parameter, and so wouldn't be +marked. The exception is `BindToRef`, but we don't want to mark its argument +with `ref`.) As an _experiment_, we are saying a pointer formed by taking the address of a `ref` bound name is LLVM-`captures(none)` and LLVM-`noalias`. This means that From 28d8cc2aa5e96f8b859362483f1c66a48c3097ad Mon Sep 17 00:00:00 2001 From: Josh L Date: Wed, 21 May 2025 18:54:34 +0000 Subject: [PATCH 23/57] Changes in response to review --- proposals/p5434.md | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index e7666e2ee95a9..712f71f7e4912 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -347,9 +347,15 @@ var j: i32 = ReturnVar(); caller provides the address of storage to initialize with an object of type `T`, that the caller owns upon return. The object will never be returned in registers. -- A call to a function declared `-> T` is unchanged. It is an initializing - expression. It will either match `-> var T` or return by copy, depending on - the initializing representation of `T`. +- The behavior of a call to a function declared `-> T` is unchanged. It is an + initializing expression. It will either match `-> var T` or return by copy, + depending on the initializing representation of `T`. +- The behavior of `auto` as the return type is unchanged, but now supports an + optional `ref`, `let` or `var` between the `->` and `auto`. `-> auto` and + `-> var auto` return an initializing expression, `-> let auto` returns a + value expression, and `-> ref auto` returns a durable reference expression. +- Using `=>` to specify a return continues to return an initializing + expression, as before. A function may have multiple returns, each with their own marker, by using a paren or brace compound return form. @@ -555,6 +561,8 @@ let ref c: C = v.0; let ref i: i32 = v.1; ``` +Note that `ref` is disallowed inside `var` since that would be redundant. + ### `bound` parameters It is an error to return a reference to a temporary object that won't live once @@ -621,8 +629,9 @@ class DerefPointerMember { } ``` -The `bound` parameters must include everything referenced by the return, but -need not be referenced, particularly not on every code path. +Otherwise, `bound` parameters and global variables are the sources of storage +that can be referenced by a return, but need not be referenced, particularly not +on every code path. ```carbon // Result references `r` if `b` is true, and `p` @@ -707,11 +716,11 @@ The address of a `ref` binding is `noalias` and either `captures(none)` or - `captures(ret: address, provenance)`: is like `captures(none)` but may be referenced by a return. -The `noalias` semantics are the minimum for the optimization. But this condition -is hard to check, so safe code will use a stricter criteria. Unsafe code will be -required to adhere to the looser `noalias` restrictions, but will not be checked -(except possibly by a sanitizer at runtime). The details here will be tackled as -part of the memory safety design. +The `noalias` semantics are the minimum for the "move-in-move-out" optimization. +But this condition is hard to check, so safe code will use a stricter criteria. +Unsafe code will be required to adhere to the looser `noalias` restrictions, but +will not be checked (except possibly by a sanitizer at runtime). The details +here will be tackled as part of the memory safety design. Optimizations will only be performed based on information that is enforced or checked by the compiler, so these attributes won't be passed to LLVM unless From 3570dc030bb7b7e17ee4d8de5f5eaf46bead05dd Mon Sep 17 00:00:00 2001 From: josh11b <15258583+josh11b@users.noreply.github.com> Date: Wed, 21 May 2025 11:55:50 -0700 Subject: [PATCH 24/57] Update proposals/p5434.md Co-authored-by: Geoff Romer --- proposals/p5434.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 712f71f7e4912..42860e60ac9f8 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -437,10 +437,7 @@ fn F() -> var T { return var; } -// Changing this to fn G() -> var T has a semantic effect -// if T has a copy initializing representation (and this -// passes safety checks). -fn G() -> T { +fn G() -> var T { return F(); } @@ -449,7 +446,9 @@ fn H() -> var T { } var x: T = H(); -// Is x.p == &x at this point? +// x.p == &x at this point, but might not be the case if +// the signature of `G` were `fn G() -> T` (depending +// on the initializing representation of T). ``` ### Compound return forms and patterns From 3f654fc48e79276951d7fe7ae643aaa1de7e6291 Mon Sep 17 00:00:00 2001 From: Josh L Date: Wed, 21 May 2025 19:06:53 +0000 Subject: [PATCH 25/57] Move indexing changes to the relevant design doc --- docs/design/expressions/indexing.md | 18 ++++---- proposals/p5434.md | 68 ++--------------------------- 2 files changed, 12 insertions(+), 74 deletions(-) diff --git a/docs/design/expressions/indexing.md b/docs/design/expressions/indexing.md index c0327ecceed5d..6e20bc3f5c9c1 100644 --- a/docs/design/expressions/indexing.md +++ b/docs/design/expressions/indexing.md @@ -65,12 +65,12 @@ Its semantics are defined in terms of the following interfaces: interface IndexWith(SubscriptType:! type) { let ElementType:! type; fn At[self: Self](subscript: SubscriptType) -> ElementType; - fn Addr[addr self: Self*](subscript: SubscriptType) -> ElementType*; + fn Ref[bound ref self: Self](subscript: SubscriptType) -> ref ElementType; } interface IndirectIndexWith(SubscriptType:! type) { require Self impls IndexWith(SubscriptType); - fn Addr[self: Self](subscript: SubscriptType) -> ElementType*; + fn Ref[bound self: Self](subscript: SubscriptType) -> ref ElementType; } ``` @@ -79,11 +79,11 @@ rewritten based on the expression category of _lhs_ and whether `T` is known to implement `IndirectIndexWith(I)`: - If `T` implements `IndirectIndexWith(I)`, the expression is rewritten to - "`*((` _lhs_ `).(IndirectIndexWith(I).Addr)(` _index_ `))`". + "`(` _lhs_ `).(IndirectIndexWith(I).Ref)(` _index_ `)`". - Otherwise, if _lhs_ is a [_durable reference expression_](/docs/design/values.md#durable-reference-expressions), - the expression is rewritten to "`*((` _lhs_ `).(IndexWith(I).Addr)(` _index_ - `))`". + the expression is rewritten to "`(` _lhs_ `).(IndexWith(I).Ref)(` _index_ + `)`". - Otherwise, the expression is rewritten to "`(` _lhs_ `).(IndexWith(I).At)(` _index_ `)`". @@ -93,12 +93,12 @@ implement `IndirectIndexWith(I)`: final impl forall [SubscriptType:! type, T:! IndirectIndexWith(SubscriptType)] T as IndexWith(SubscriptType) { - let ElementType:! type = T.(IndirectIndexWith(SubscriptType)).ElementType; + let ElementType:! type = T.(IndirectIndexWith(SubscriptType).ElementType); fn At[self: Self](subscript: SubscriptType) -> ElementType { - return *(self.(IndirectIndexWith(SubscriptType).Addr)(index)); + return self.(IndirectIndexWith(SubscriptType).Ref)(index); } - fn Addr[addr self: Self*](subscript: SubscriptType) -> ElementType* { - return self->(IndirectIndexWith(SubscriptType).Addr)(index); + fn Ref[bound ref self: Self](subscript: SubscriptType) -> ref ElementType { + return self.(IndirectIndexWith(SubscriptType).Ref)(index); } } ``` diff --git a/proposals/p5434.md b/proposals/p5434.md index 42860e60ac9f8..0edab8ae1be6b 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -833,77 +833,15 @@ class SmartPtr(T:! type) { [Proposal #2274: "Subscript syntax and semantics"](https://github.com/carbon-language/carbon-lang/pull/2274) added the interfaces used to support indexing with the subscripting operator -`[`...`]`, see the [indexing design](/docs/design/expressions/indexing.md): - -```carbon -interface IndexWith(SubscriptType:! type) { - let ElementType:! type; - fn At[self: Self](subscript: SubscriptType) -> ElementType; - fn Addr[addr self: Self*](subscript: SubscriptType) -> ElementType*; -} - -interface IndirectIndexWith(SubscriptType:! type) { - require Self impls IndexWith(SubscriptType); - fn Addr[self: Self](subscript: SubscriptType) -> ElementType*; -} - -final impl forall - [SubscriptType:! type, T:! IndirectIndexWith(SubscriptType)] - T as IndexWith(SubscriptType) { - let ElementType:! type = T.(IndirectIndexWith(SubscriptType).ElementType); - fn At[self: Self](subscript: SubscriptType) -> ElementType { - return *(self.(IndirectIndexWith(SubscriptType).Addr)(index)); - } - fn Addr[addr self: Self*](subscript: SubscriptType) -> ElementType* { - return self->(IndirectIndexWith(SubscriptType).Addr)(index); - } -} -``` - -We change these in the following ways: +`[`...`]`. We change these in the following ways: - The `addr self` parameters are changed to `bound ref self`, to allow the result to reference the `self` object. - The `Addr` methods are renamed `Ref` and return a reference instead of a pointer that is automatically dereferenced. -```carbon -interface IndexWith(SubscriptType:! type) { - let ElementType:! type; - fn At[self: Self](subscript: SubscriptType) -> ElementType; - fn Ref[bound ref self: Self](subscript: SubscriptType) -> ref ElementType; -} - -interface IndirectIndexWith(SubscriptType:! type) { - require Self impls IndexWith(SubscriptType); - fn Ref[bound self: Self](subscript: SubscriptType) -> ref ElementType; -} - -final impl forall - [SubscriptType:! type, T:! IndirectIndexWith(SubscriptType)] - T as IndexWith(SubscriptType) { - let ElementType:! type = T.(IndirectIndexWith(SubscriptType).ElementType); - fn At[self: Self](subscript: SubscriptType) -> ElementType { - return self.(IndirectIndexWith(SubscriptType).Ref)(index); - } - fn Ref[bound ref self: Self](subscript: SubscriptType) -> ref ElementType { - return self.(IndirectIndexWith(SubscriptType).Ref)(index); - } -} -``` - -With these changes, `lhs[index]` with `lhs` of type `T` and `index` of type `I` -will be rewritten to one of: - -- Before: `*(lhs.(IndirectIndexWith(I).Addr)(index))` -> After: - `lhs.(IndirectIndexWith(I).Ref)(index)` -- Before: `*(lhs.(IndexWith(I).Addr)(index))` -> After: - `lhs.(IndexWith(I).Ref)(index)` -- Unchanged: `lhs.(IndexWith(I).At)(index)` - -depending on whether `T` is known to implement `IndirectIndexWith(I)` and -whether `lhs` is a -[durable reference expression](/docs/design/values.md#durable-reference-expressions). +This PR with this proposal makes those changes to the +[indexing design](/docs/design/expressions/indexing.md). ### Use case: member binding interfaces From 2a76c74e69a3cc0cd5c302259ab892c33b2b228a Mon Sep 17 00:00:00 2001 From: Josh L Date: Wed, 21 May 2025 21:09:37 +0000 Subject: [PATCH 26/57] Incorporate more feedback --- proposals/p5434.md | 208 ++++++++++++++++++++------------------------- 1 file changed, 91 insertions(+), 117 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 0edab8ae1be6b..350b44fc6cd47 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -19,7 +19,6 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [`ref` bindings](#ref-bindings) - [`ref`, `let`, and `var` returns](#ref-let-and-var-returns) - [Details](#details) - - [Initializing returns](#initializing-returns) - [Compound return forms and patterns](#compound-return-forms-and-patterns) - [Nested binding patterns](#nested-binding-patterns) - [`bound` parameters](#bound-parameters) @@ -51,6 +50,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Other return conventions](#other-return-conventions) - [`return var` with compound return forms](#return-var-with-compound-return-forms) - [Other syntax for compound return forms](#other-syntax-for-compound-return-forms) + - [`ref` parameters allow aliasing](#ref-parameters-allow-aliasing) @@ -299,9 +299,9 @@ fn G() -> bool { ### `ref`, `let`, and `var` returns -The return of a function can optionally be marked `ref`, `let`, or `var`. These -control the category of the call expression invoking the function, and how the -return expression is returned. +The return of a function can optionally be marked `ref` or `let`. These control +the category of the call expression invoking the function, and how the return +expression is returned. ```carbon var global: i32 = 2; @@ -331,11 +331,11 @@ fn ReturnLet() -> let i32 { // ReturnLet() is a value expression. let l: i32 = ReturnLet(); -fn ReturnVar() -> var i32 { +fn ReturnDefault() -> i32 { return 2; } -// ReturnVar() is an initializing expression. -var j: i32 = ReturnVar(); +// ReturnDefault() is an initializing expression. +var j: i32 = ReturnDefault(); ``` - A call to a function declared `-> ref T` is a durable reference expression. @@ -343,17 +343,13 @@ var j: i32 = ReturnVar(); - A call to a function declared `-> let T` is a value expression. The function will return the value representation of `T`. Since values have no address, the value representation may be returned in registers. -- A call to a function declared `-> var T` is an initializing expression. The - caller provides the address of storage to initialize with an object of type - `T`, that the caller owns upon return. The object will never be returned in - registers. - The behavior of a call to a function declared `-> T` is unchanged. It is an - initializing expression. It will either match `-> var T` or return by copy, - depending on the initializing representation of `T`. + initializing expression, returning in place or by copy depending on the + initializing representation of `T`. - The behavior of `auto` as the return type is unchanged, but now supports an - optional `ref`, `let` or `var` between the `->` and `auto`. `-> auto` and - `-> var auto` return an initializing expression, `-> let auto` returns a - value expression, and `-> ref auto` returns a durable reference expression. + optional `ref` or `let` between the `->` and `auto`. `-> auto` continues to + return an initializing expression, `-> let auto` returns a value expression, + and `-> ref auto` returns a durable reference expression. - Using `=>` to specify a return continues to return an initializing expression, as before. @@ -362,17 +358,17 @@ paren or brace compound return form. ```carbon fn ParenReturn() - -> (-> bool, ->let f32, ->var C, ->ref i32) { - return (true, 1.0, {.x = 3}, global); + -> (->let bool, ->ref i32, -> C) { + return (true, global, {.x = 3}); } fn BraceReturn() - -> {-> .a: bool, - ->let .b: f32, - ->var .c: C, - ->ref .d: i32} { - return {.a = true, .b = 1.0, - .c = {.x = 3}, .d = global}; + -> {->let .a: bool, + ->ref .b: i32, + -> .c: C} { + return {.a = true, + .b = global, + .c = {.x = 3}}; } ``` @@ -424,33 +420,6 @@ possible. Their benefits are related to their limitations, so to get those benefits we should use them when a use is restricted enough to be within those limitations. -### Initializing returns - -The `-> var T` return form guarantees that `returned var` is the same variable -in the caller and callee. In contrast, the `-> T` return form also produces an -initializing expression, but may introduce a copy depending on the initializing -representation. For example: - -```carbon -fn F() -> var T { - returned var v: T = {.p = &v}; - return var; -} - -fn G() -> var T { - return F(); -} - -fn H() -> var T { - return G(); -} - -var x: T = H(); -// x.p == &x at this point, but might not be the case if -// the signature of `G` were `fn G() -> T` (depending -// on the initializing representation of T). -``` - ### Compound return forms and patterns Mirroring the [paren](/docs/design/pattern_matching.md#tuple-patterns) and @@ -461,40 +430,36 @@ support paren and brace return forms. Every element of these forms starts with ```carbon // Paren return form fn ParenReturn(...) - -> (-> bool, ->let f32, ->var C, ->ref i32); + -> (->let bool, ->ref i32, -> C); -let (var a: bool, b: f32, var c: C, ref d: i32) +let (a: bool, ref b: i32, var c: C) = ParenReturn(...); fn BraceReturn(...) - -> {-> .a: bool, - ->let .b: f32, - ->var .c: C, - ->ref .d: i32}; - -// Binds to the names `x`, `y`, `z`, `w`: -let {.a = var x: bool, - .b = y: f32, - .c = var z: C, - .d = ref w: i32} = BraceReturn(...); - -// Binds to the names `a`, `b`, `c`, `d`: -let {var a: bool, - b: f32, - var c: C, - ref d: i32} = BraceReturn(...); + -> {->let .a: bool, + ->ref .b: i32, + -> .c: C}; + +// Binds to the names `x`, `y`, `z`: +let {.a = x: bool, + .b = ref y: i32, + .c = var z: C} = BraceReturn(...); + +// Binds to the names `a`, `b`, `c`: +let {a: bool, + ref b: i32, + var c: C} = BraceReturn(...); // Above two can be mixed, binding to -// names `a`, `b`, `z`, `w`. -let {var a: bool, - b: f32, - .c = var z: C, - .d = ref w: i32} = BraceReturn(...); +// names `a`, `y`, `z`. +let {a: bool, + .b = ref y: i32 + .c = var z: C} = BraceReturn(...); ``` Note that we can distinguish between returning a compound form and a tuple or -struct with one token of lookahead. Only types are allowed after a `-> let`, -`-> var` or `-> ref`, not a compound return form. Examples: +struct with one token of lookahead. Only types are allowed after a `-> let` or +`-> ref`, not a compound return form. Examples: ```carbon // Returns a tuple of type @@ -507,14 +472,14 @@ fn Invalid1(...) -> (bool, ->let f32); // ❌ Invalid, can only specify return category // on innermost `->`. -fn Invalid2(...) -> var (-> bool, -> f32); +fn Invalid2(...) -> let (-> bool, -> f32); ``` To nest, use the default `->` in the outer form, as in: ```carbon fn BracesInParens(...) - -> (->{-> .a: bool, ->let .b: f32}, ->var C, -ref i32); + -> (->{-> .a: bool, ->let .b: f32}, -> C, ->ref i32); let ({.a = var x: bool, .b = let y: f32}, var c: C, ref d: i32) = BracesInParens(); @@ -524,7 +489,7 @@ let ({var a: bool, let b: f32}, fn ParensInBraces(...) -> {-> .a: bool, - -> .b: (->let f32, ->var C), + -> .b: (->let f32, -> C), ->ref .c: i32}; let {a: bool, @@ -684,14 +649,14 @@ between typechecking against the signatures in an interface when the `impl` functions are different. Exception: the question of whether `bound` affects the lifetime of temporaries is [future work](#temporary-lifetimes). -Note that all combinations of a `var`/`ref`/`let` return can be bound to a -`var`/`ref`/`let` parameter. Examples: +Note that all combinations of a `lef`/`ref`/default return can be bound to a +`let`/`ref`/`var` parameter. Examples: ```carbon fn RefToLet(bound ref x: C) -> let D { return x.d; } fn LetToRef(bound y: C) -> ref D { return *y.ptr; } fn VarToRef(bound var p: i32*) -> ref i32 { return *p; } -fn VarToVar(bound var p: i32*) -> var i32* { return p; } +fn VarToDefault(bound var p: i32*) -> i32* { return p; } ``` For full safety, we need each bound parameter to be immutable for the duration @@ -770,22 +735,16 @@ handle `ref`. ### Interaction with `returned var` The rule is: `returned var` may only be used when there is a single component to -the return form, and it is either `->var` or default `->`. +the return form, and it is default `->`. ```carbon -fn F(...) -> var V { +fn F(...) -> V { returned var v: V = ...; // ... return var; } -fn F(...) -> T { - returned var ret: T = ...; - // ... - return var; -} - -fn F(...) -> {->var .a : T} { +fn F(...) -> {-> .a : T} { returned var ret: T = ...; // ... return var; @@ -840,7 +799,8 @@ added the interfaces used to support indexing with the subscripting operator - The `Addr` methods are renamed `Ref` and return a reference instead of a pointer that is automatically dereferenced. -This PR with this proposal makes those changes to the +[This proposal's PR](https://github.com/carbon-language/carbon-lang/pull/5434) +makes those changes to the [indexing design](/docs/design/expressions/indexing.md). ### Use case: member binding interfaces @@ -1200,29 +1160,41 @@ work. ### Other return conventions -We also considered other conventions for returning from functions, on +We also considered other conventions for returning from functions, in +[a comment on #5434](https://github.com/carbon-language/carbon-lang/pull/5434/files#r2099145225), +on [2025-05-08](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.vdognq1upsf5) -and +and on [2025-05-12](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.1mjh6unumnwu), most notably: -- **var without storage**: callee returns pointer to the storage of a +- **in place**: This convention was like `->`, but always using the "in place" + convention where the caller allocates storage and provides the callee with + its address, the callee initializes the storage at that address, and the + caller is responsible for destroying after the return. +- **var without storage**: The callee returns a pointer to the storage of a subobject of a `bound var` parameter, that caller is then responsible for destroying. A call to this function is reference expression, but with additional responsibility to destroy. -- **hybrid**: if copy value representation or trivial destructive move then - return the object representation directly; otherwise caller passes a pointer - and callee initializes it. +- **hybrid**: If the type has a copy value representation or trivial + destructive move then return the object representation directly; otherwise + caller passes a pointer and callee initializes it. There were also some variations on what the conditions for returning in registers using the default return convention. -We considered "var without storage" the longest, but the fact that it couldn't +We seriously considered "var without storage", but the fact that it couldn't reliably used to initialize a variable, particularly in the middle of an object, -meant it did not seem valuable enough to include. It seemed more valuable to -support the current `-> var`. That return form allows you to guarantee knowing -the address of the object being constructed, and was a good match for -`returned var`. +meant it did not seem valuable enough to include. + +It seemed more valuable to support the "in place" return convention. That return +form allows you to guarantee knowing the address of the object being +constructed, and was a good match for `returned var`. However, we realized that +`var` declarations shouldn't always be associated with in-memory storage, in +particular for types that may be trivially moved. For example, a `var` parameter +with the C++ type `std::unique_ptr` should be passed in registers. A function +returning a `std::unique_ptr` in place would not be as efficient as returning +it by moving it into registers. ### `return var` with compound return forms @@ -1231,7 +1203,7 @@ We considered various syntax options on but none of them seemed good enough to justify inclusion at this time: ```carbon -fn F(...) -> (->ref R, ->let L, ->var V) { +fn F(...) -> (->ref R, ->let L, -> V) { // No longer a `var` being returned. Ideally these // shouldn't have to be initialized together. returned ??? (ref r: R, let l: L, var v: V) = ...? @@ -1241,20 +1213,20 @@ fn F(...) -> (->ref R, ->let L, ->var V) { // We could restrict to one `var` return component, // but this is a lot of machinery for a small increase // in expressiveness and applicability. -fn F(...) -> (->ref R, ->let L, ->var V) { +fn F(...) -> (->ref R, ->let L, -> V) { returned var v: V = ...; let l: L = ...; return (*r, l, var); } -fn F(...) -> (->ref R, ->let L, ->var V) { +fn F(...) -> (->ref R, ->let L, -> V) { // These don't have the right category, and ideally // shouldn't have to be initialized together. returned var ret: (R, L, V) = ...? return var; } -fn F(...) -> (->ref R, ->let L, ->var V) { +fn F(...) -> (->ref R, ->let L, -> V) { returned var (_, _, var v: V) = ; } ``` @@ -1262,9 +1234,9 @@ fn F(...) -> (->ref R, ->let L, ->var V) { There was another approach we considered for `returned var` originally: ```carbon -fn F(...) -> (->ref R, ->let L, ->var v1: V1, ->var v2: V2) { +fn F(...) -> (->ref R, ->let L, -> v1: V1, -> v2: V2) { // ... - // Must use the same names for `var` parameters + // Must use the same names for `->` returns with bound names. return (r, l, v1, v2); } ``` @@ -1278,16 +1250,10 @@ But this had downsides that still apply: Our current approach handles our main use case for `returned var`: factory functions. -We also considered only allowing `returned var` with `-> var` returns, not -default returns. This would avoid bugs from assuming the address of the -`returned var` matches the `var` in the caller. More important, though, to -support `returned var` in generic use cases, since it avoids unnecessary -requirements on the type. - -We could support an "only `var`s" approach in the future if we want: +We could support an "only `->`s" approach in the future if we want: ```carbon -fn F(...) -> (->var V1, ->var V2, ->var V3) { +fn F(...) -> (-> V1, -> V2, -> V3) { returned var (v1: V1, v2: V2, v3: V3) = ...; // ... return var; @@ -1317,3 +1283,11 @@ We also considered an approach where compound return forms would start with `->?`, but this raised concerns about what the meaning of that syntax would be and whether we want to expose users to that in cases we might be able to avoid it. + +### `ref` parameters allow aliasing + +If requiring `ref` parameters ends up being to restrictive, we could instead +have the "move-in-move-out" optimization be done only when the compiler can +prove it safe. One strategy would be to generate an alternate version of the +function that is only used in the cases where the `noalias` conditions can be +shown to hold statically. From 2915e392332036749f9358694997e52860dd9e0e Mon Sep 17 00:00:00 2001 From: Josh L Date: Wed, 21 May 2025 21:09:56 +0000 Subject: [PATCH 27/57] typo --- proposals/p5434.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 350b44fc6cd47..81011e957ed5b 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -1286,7 +1286,7 @@ it. ### `ref` parameters allow aliasing -If requiring `ref` parameters ends up being to restrictive, we could instead +If requiring `ref` parameters ends up being too restrictive, we could instead have the "move-in-move-out" optimization be done only when the compiler can prove it safe. One strategy would be to generate an alternate version of the function that is only used in the cases where the `noalias` conditions can be From cf7d94e5b60f9ed0301d0936372516a88bf448f1 Mon Sep 17 00:00:00 2001 From: Josh L Date: Wed, 21 May 2025 22:07:47 +0000 Subject: [PATCH 28/57] Implement suggestion --- proposals/p5434.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 81011e957ed5b..20b1d41231d79 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -339,7 +339,8 @@ var j: i32 = ReturnDefault(); ``` - A call to a function declared `-> ref T` is a durable reference expression. - The function will return the address of a `T` object. + The generated code for that function will return the address of a `T` + object. - A call to a function declared `-> let T` is a value expression. The function will return the value representation of `T`. Since values have no address, the value representation may be returned in registers. From 4075aed82caf2e7a75aea142beedda309e509ce4 Mon Sep 17 00:00:00 2001 From: Josh L Date: Wed, 21 May 2025 23:50:01 +0000 Subject: [PATCH 29/57] Add small section on completeness --- proposals/p5434.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/proposals/p5434.md b/proposals/p5434.md index 20b1d41231d79..0382678b3e912 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -30,6 +30,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Use case: indexing interfaces](#use-case-indexing-interfaces) - [Use case: member binding interfaces](#use-case-member-binding-interfaces) - [Use case: class accessors](#use-case-class-accessors) + - [Type completeness](#type-completeness) - [Future work](#future-work) - [Temporary lifetimes](#temporary-lifetimes) - [`ref` bindings in lambdas](#ref-bindings-in-lambdas) @@ -899,6 +900,14 @@ class HasMember { } ``` +### Type completeness + +Not a change by this proposal, but note that our existing rules will require the +type in a `ref` binding to be complete in situations where it would not need to +be if you were using a value binding with a pointer type instead. We may need to +change this in the future to match C++ which treats reference types like pointer +types for completeness purposes. + ## Future work ### Temporary lifetimes From 3e13632279a0536a5702dd87256a3a7ab9a78406 Mon Sep 17 00:00:00 2001 From: Josh L Date: Thu, 22 May 2025 17:32:27 +0000 Subject: [PATCH 30/57] Title --- proposals/p5434.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 0382678b3e912..95edd0cbd185c 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -1,4 +1,4 @@ -# `ref` +# `ref` parameters, arguments, returns and `let` returns @@ -63,16 +66,14 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception reference expression in the callee. - Unlike pointers, a `ref` binding can't be rebound to a different object. - This replaces `addr`, and is not restricted to the `self` parameter. - - A `ref` binding, like a `let` binding, can't be used in fields of + - A `ref` binding, like a value binding, can't be used in fields of classes or structs. - When calling functions, arguments to non-`self` `ref` parameters are also marked with `ref`. -- The return of a function can optionally be marked `ref`, `let`, or `var`. +- The return of a function can optionally be marked `ref`, `val`, or `var`. These control the category of the call expression invoking the function, and how the return expression is returned. - These may be mixed for functions returning parens or brace forms. -- Any parameters whose lifetime needs to contain the lifetime of the return - must be marked `bound`. - The address of a `ref` binding is `nocapture` and `noalias`. - We mark parameters of a function that may be referenced by the return value with `bound`. @@ -304,9 +305,9 @@ fn G() -> bool { } ``` -### `ref` and `let` returns +### `ref` and `val` returns -The return of a function can optionally be marked `ref` or `let`. These control +The return of a function can optionally be marked `ref` or `val`. These control the category of the call expression invoking the function, and how the return expression is returned. @@ -332,11 +333,11 @@ fn AddFive() { AddFive(); Assert(global == 10); -fn ReturnLet() -> let i32 { +fn ReturnVal() -> val i32 { return 2; } -// ReturnLet() is a value expression. -let l: i32 = ReturnLet(); +// ReturnVal() is a value expression. +let l: i32 = ReturnVal(); fn ReturnDefault() -> i32 { return 2; @@ -348,15 +349,15 @@ var j: i32 = ReturnDefault(); - A call to a function declared `-> ref T` is a durable reference expression. The generated code for that function will return the address of a `T` object. -- A call to a function declared `-> let T` is a value expression. The function +- A call to a function declared `-> val T` is a value expression. The function will return the value representation of `T`. Since values have no address, the value representation may be returned in registers. - The behavior of a call to a function declared `-> T` is unchanged. It is an initializing expression, returning in place or by copy depending on the initializing representation of `T`. - The behavior of `auto` as the return type is unchanged, but now supports an - optional `ref` or `let` between the `->` and `auto`. `-> auto` continues to - return an initializing expression, `-> let auto` returns a value expression, + optional `ref` or `val` between the `->` and `auto`. `-> auto` continues to + return an initializing expression, `-> val auto` returns a value expression, and `-> ref auto` returns a durable reference expression. - Using `=>` to specify a return continues to return an initializing expression, as before. Trying to infer the expression category from the @@ -369,12 +370,12 @@ A function may have multiple returns, each with their own marker, by using a paren or brace compound return form. ```carbon -fn ParenReturn() -> (let bool, ref i32, C) { +fn ParenReturn() -> (val bool, ref i32, C) { return (true, global, {.x = 3}); } fn BraceReturn() - -> {.a: let bool, + -> {.a: val bool, .b: ref i32, .c: C} { return {.a = true, @@ -435,19 +436,23 @@ limitations. Mirroring the [paren](/docs/design/pattern_matching.md#tuple-patterns) and [brace](/docs/design/pattern_matching.md#struct-patterns) pattern forms, we also -support paren and brace return forms. Every element of these forms starts with -`->` and the kind of return (if not default). +support paren and brace return forms. + +FIXME: Replace this + +Every element of these forms starts with `->` and the kind of return (if not +default). ```carbon // Paren return form fn ParenReturn(...) - -> (->let bool, ->ref i32, -> C); + -> (->val bool, ->ref i32, -> C); let (a: bool, ref b: i32, var c: C) = ParenReturn(...); fn BraceReturn(...) - -> {->let .a: bool, + -> {->val .a: bool, ->ref .b: i32, -> .c: C}; @@ -469,7 +474,7 @@ let {a: bool, ``` Note that we can distinguish between returning a compound form and a tuple or -struct with one token of lookahead. Only types are allowed after a `-> let` or +struct with one token of lookahead. Only types are allowed after a `-> val` or `-> ref`, not a compound return form. Examples: ```carbon @@ -479,28 +484,28 @@ fn TupleReturn(...) -> (bool, f32, C, i32); // ❌ Invalid, every element of a compound return form must // start with `->`. -fn Invalid1(...) -> (bool, ->let f32); +fn Invalid1(...) -> (bool, ->val f32); // ❌ Invalid, can only specify return category // on innermost `->`. -fn Invalid2(...) -> let (-> bool, -> f32); +fn Invalid2(...) -> val (-> bool, -> f32); ``` To nest, use the default `->` in the outer form, as in: ```carbon fn BracesInParens(...) - -> (->{-> .a: bool, ->let .b: f32}, -> C, ->ref i32); + -> (->{-> .a: bool, ->val .b: f32}, -> C, ->ref i32); -let ({.a = var x: bool, .b = let y: f32}, +let ({.a = var x: bool, .b = val y: f32}, var c: C, ref d: i32) = BracesInParens(); // or without renaming: -let ({var a: bool, let b: f32}, +let ({var a: bool, val b: f32}, var c: C, ref d: i32) = BracesInParens(); fn ParensInBraces(...) -> {-> .a: bool, - -> .b: (->let f32, -> C), + -> .b: (->val f32, -> C), ->ref .c: i32}; let {a: bool, @@ -574,7 +579,7 @@ class ReturnMember { } // ❌ Invalid: can't return reference to value. - fn InvalidLet[bound self: Self]() -> ref i32 { + fn InvalidVal[bound self: Self]() -> ref i32 { return self.m; } @@ -592,7 +597,7 @@ class DerefPointerMember { } // ✅ Valid - fn ValidLet[bound self: Self]() -> ref i32 { + fn ValidVal[bound self: Self]() -> ref i32 { return *self.pm; } @@ -660,12 +665,12 @@ between typechecking against the signatures in an interface when the `impl` functions are different. Exception: the question of whether `bound` affects the lifetime of temporaries is [future work](#temporary-lifetimes). -Note that all combinations of a `lef`/`ref`/default return can be bound to a -`let`/`ref`/`var` parameter. Examples: +Note that all combinations of a `val`/`ref`/default return can be bound to a +value/`ref`/`var` parameter. Examples: ```carbon -fn RefToLet(bound ref x: C) -> let D { return x.d; } -fn LetToRef(bound y: C) -> ref D { return *y.ptr; } +fn RefToVal(bound ref x: C) -> val D { return x.d; } +fn ValToRef(bound y: C) -> ref D { return *y.ptr; } fn VarToRef(bound var p: i32*) -> ref i32 { return *p; } fn VarToDefault(bound var p: i32*) -> i32* { return p; } ``` @@ -730,7 +735,7 @@ for those cases. ### Part of the expression type system, not object types -Much like value/`let` and `var` bindings, `ref` binding and the new return forms +Much like value/`val` and `var` bindings, `ref` binding and the new return forms are are part of the type system, but only through expression categories, patterns (function parameters and so on), and returns. Specifically, we don't expect them to be part of the _object types_ in Carbon. Like value bindings, we @@ -807,7 +812,7 @@ added the interfaces used to support indexing with the subscripting operator - The `addr self` parameters are changed to `bound ref self`, to allow the result to reference the `self` object. -- The `At` method returns by `let`. +- The `At` method returns by `val`. - The `Addr` methods are renamed `Ref` and return a reference instead of a pointer that is automatically dereferenced. @@ -845,7 +850,7 @@ interface BindToRef(T:! type) { } ``` -Similarly, the `BindToValue` interface is changed to use a `let`/value return, +Similarly, the `BindToValue` interface is changed to use a `val`/value return, potentially avoiding a copy of large objects. Before: @@ -862,7 +867,7 @@ After: ```carbon interface BindToValue(T:! type) { extend Bind(T); - fn Op[self: Self](bound x: T) -> let Result; + fn Op[self: Self](bound x: T) -> val Result; } ``` @@ -1149,8 +1154,8 @@ This question was discussed on [2025-05-07](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.sfx9d7ltud5). We decided that the marking is not about lifetime, but ability to mutate. A -`let` may reference an object in a similar way to a `ref`, restricting -operations on the original object, but we are not going to mark `let`s since +`val` may reference an object in a similar way to a `ref`, restricting +operations on the original object, but we are not going to mark `val`s since those restrictions are enforced by the compiler. We thought the ability to mutate, though, was something important enough to highlight to readers of the code, even at the expense of extra work for the writer. @@ -1168,7 +1173,65 @@ This is a "try it and see how well it works" sort of decision. ### Top-level `ref` introducer For now, we don't believe `let ref` to be so common as to need a shorter way to -write, unlike what we do for `var`. +write, unlike what we do for `var`. This was considered in +[leads issue #5523](https://github.com/carbon-language/carbon-lang/issues/5523), +which provided this rationale: + +> I feel like this would often be used for non-local mutation due to it +> fundamentally deporting mutable value semantics and instead having reference +> semantics. Unlike the local mutation, that seems more worthwhile to have +> incentives around minimizing. +> +> However, this seems the easiest of all to revisit later if we discover that +> the added verbosity in practice is costing more than is worth any improvements +> from explicitly flagging mutable reference semantics, or if we find code is +> reaching for antipatterns due to the incentive. + +In addition, +[this comment on leads issue #5522](https://github.com/carbon-language/carbon-lang/issues/5522#issuecomment-2972029100) +argued that it would be more consistent for `ref` and `val` to only apply to +bindings, and not introduce patterns, like `let` and `var`. + +### Allow immutable value semantic bindings nested within variable patterns + +This was considered in +[leads issue #5523](https://github.com/carbon-language/carbon-lang/issues/5523), +which provided this rationale: + +> While it may be an obvious point of orthogonality, I think it adds choice +> without sufficient motivation, and even having that choice does add some +> complexity to the language. +> +> It also seems like we could add this later if there is sufficient demand when +> we have larger usage experience body to pull from with the rest of the Carbon +> language. Currently, the affordance that feels more natural to me is what we +> have. + +> I think we're happy to see motivating use cases and revisit this. At the +> moment, we've just not seen motivating use cases -- everything has seemed a +> bit too contrived. + +### Remove `var` as a top-level statement introducer + +This was considered in +[leads issue #5523](https://github.com/carbon-language/carbon-lang/issues/5523), +which provided this rationale: + +> Locals are important, frequent, and frequently mutable. I don't think forcing +> varying locals to go through let var for orthogonality aids readability enough +> to offset the verbosity cost of added keywords on a reasonably common pattern. +> +> I'm still currently in the position that locally owned objects being mutable +> should not be "discouraged" or "disincentivized" by the language. And I think +> adding artificial incentives to try and avoid needing a mutable local variable +> would either have no effect beyond verbosity, or if it did have effect, it +> wouldn't be a net positive effect due to code being written in a less +> straightforward manner in order to avoid mutation. +> +> To be clear, this is based on intuition and judgement based on my experience, +> not in any way based on data or specific motivating examples. I can imagine +> data or evidence or even a new perspective changing my position here, but so +> far the discussion we've had haven't done that. ### `ref` as a type qualifier @@ -1189,15 +1252,15 @@ cases. We also noted that including references in the type system led to a number of inconsistencies in C++, such as no there not being references of references. -### `bound` would change the default return to `let` +### `bound` would change the default return to `val` We considered saying that `bound` would change the default return to use the -`->let` return convention. This was discussed on +`->val` return convention. This was discussed on [2025-05-01](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.dffumsu6wzlc) and [2025-05-08](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.vdognq1upsf5). -The idea is that `let` is expected to be efficient, so we should encourage using -it, but we can't always use `let`, since some types have a reference value +The idea is that `val` is expected to be efficient, so we should encourage using +it, but we can't always use `val`, since some types have a reference value representation, but `bound` alleviates that concern. Once we realized that `bound` is relevant for all return conventions, we @@ -1259,30 +1322,30 @@ We considered various syntax options on but none of them seemed good enough to justify inclusion at this time: ```carbon -fn F(...) -> (->ref R, ->let L, -> V) { +fn F(...) -> (->ref R, ->val L, -> V) { // No longer a `var` being returned. Ideally these // shouldn't have to be initialized together. - returned ??? (ref r: R, let l: L, var v: V) = ...? + returned ??? (ref r: R, val l: L, var v: V) = ...? return var; } // We could restrict to one `var` return component, // but this is a lot of machinery for a small increase // in expressiveness and applicability. -fn F(...) -> (->ref R, ->let L, -> V) { +fn F(...) -> (->ref R, ->val L, -> V) { returned var v: V = ...; let l: L = ...; return (*r, l, var); } -fn F(...) -> (->ref R, ->let L, -> V) { +fn F(...) -> (->ref R, ->val L, -> V) { // These don't have the right category, and ideally // shouldn't have to be initialized together. returned var ret: (R, L, V) = ...? return var; } -fn F(...) -> (->ref R, ->let L, -> V) { +fn F(...) -> (->ref R, ->val L, -> V) { returned var (_, _, var v: V) = ; } ``` @@ -1290,7 +1353,7 @@ fn F(...) -> (->ref R, ->let L, -> V) { There was another approach we considered for `returned var` originally: ```carbon -fn F(...) -> (->ref R, ->let L, -> v1: V1, -> v2: V2) { +fn F(...) -> (->ref R, ->val L, -> v1: V1, -> v2: V2) { // ... // Must use the same names for `->` returns with bound names. return (r, l, v1, v2); @@ -1340,6 +1403,18 @@ We also considered an approach where compound return forms would start with and whether we want to expose users to that in cases we might be able to avoid it. +The original proposed syntax used an arrow `->` in each component of a compound +form. This avoided ambiguity, but was verbose and visually noisy. An alternative +was suggested in +[discussion on 2025-06-13](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.52ru7ner80b4). + +FIXME + +This was then refined in +[discussion on 2025-06-16](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.rdbzk5jnin3x). + +FIXME + ### `ref` parameters allow aliasing If requiring `ref` parameters to be `noalias` ends up being too restrictive, we @@ -1347,3 +1422,7 @@ could instead have the "move-in-move-out" optimization be done only when the compiler can prove it safe. One strategy would be to generate an alternate version of the function that is only used in the cases where the `noalias` conditions can be shown to hold statically. + +### `let` to mark value returns instead of `val` + +FIXME: https://github.com/carbon-language/carbon-lang/issues/5522 From 38f0f90492d14fb84efb5e1a8c41774a3aa46fac Mon Sep 17 00:00:00 2001 From: Josh L Date: Wed, 18 Jun 2025 00:34:18 +0000 Subject: [PATCH 42/57] Checkpoint progress. --- proposals/p5434.md | 57 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 11e833deb45e9..2799d429a63a8 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -21,6 +21,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Details](#details) - [Compound return forms and patterns](#compound-return-forms-and-patterns) - [Nested binding patterns](#nested-binding-patterns) + - [Mututation restriction on objects bound to a value](#mututation-restriction-on-objects-bound-to-a-value) - [`bound` parameters](#bound-parameters) - [How addresses interact with `ref`](#how-addresses-interact-with-ref) - [Improved C++ interop and migration](#improved-c-interop-and-migration) @@ -543,6 +544,53 @@ let ref i: i32 = v.1; Note that `ref` is disallowed inside `var` since that would be redundant. +### Mututation restriction on objects bound to a value + +FIXME: https://github.com/carbon-language/carbon-lang/issues/5524 + +1. Are we comfortable requiring no mutation of objects with a non-copy-value + representation and an active `let` binding? +2. Do we expect our safety story to enforce this at compile time with our + strictly safe dialect? +3. Are we comfortable making this erroneous behavior when we are unable to or + choose not to enforce this requirement at compile time? +4. Are we comfortable deferring the decision of whether strict enforcement of + this property is enabled in our current C++-friendly mode until our safety + design is complete? + +Discussed 2025-05-22 +https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.uot97ukynlsi + +- a) I think we should _initially_ make mutation of objects with a + non-copy-value representation in an active `let` binding erroneous behavior. +- b) I don't think we should ever make it anything more than erroneous + behavior, because we don't need to. See my note on erroneous behavior below. + If we can prove the mutation doesn't occur, then we can use that under + "as-if", and we don't need anything else. +- c) I think we will always have potential for unchecked code that _could_ in + theory mutate due to unsafe code and C++ interop. This goes to my point + about erroneous behavior. +- d) I think we should try to enforce the lack of mutation in the strict safe + dialect. + - We should only relax our stance here and consider making such mutation + _allowed_ if we discover difficulty with this that we cannot overcome. + - But we _should_ revisit the underlying idea of mutation being erroneous + if enforcing it in the strict mode proves fundamentally untenable due to + ergonomic costs. +- e) I am very comfortable deferring the decision of whether strict + enforcement is enabled in our current C++-friendly mode. + +--- + +On erroneous behavior: I don't think we should optimize on erroneous behavior, +ever, unless the compiler literally proves that it does not occur, ever. In +which case, we don't need any license to start optimizing on this, as it falls +under "as-if". Which means I don't think we should optimize on erroneous +behavior, ever. We should _only_ optimize on "as-if". The fact that UB provides +"as-if" _without_ a proof is precisely the risk of using UB for any semantics, +and why we don't use it here and elsewhere we use erroneous behavior. I should +probably write this up as a principle. + ### `bound` parameters It is an error to return a reference to a temporary object that won't live once @@ -1199,7 +1247,7 @@ This was considered in which provided this rationale: > While it may be an obvious point of orthogonality, I think it adds choice -> without sufficient motivation, and even having that choice does add some +> without sufficient motivation, and even _having_ that choice does add some > complexity to the language. > > It also seems like we could add this later if there is sufficient demand when @@ -1218,13 +1266,14 @@ This was considered in which provided this rationale: > Locals are important, frequent, and frequently mutable. I don't think forcing -> varying locals to go through let var for orthogonality aids readability enough -> to offset the verbosity cost of added keywords on a reasonably common pattern. +> varying locals to go through `let var` for orthogonality aids readability +> enough to offset the verbosity cost of added keywords on a reasonably common +> pattern. > > I'm still currently in the position that locally owned objects being mutable > should not be "discouraged" or "disincentivized" by the language. And I think > adding artificial incentives to try and avoid needing a mutable local variable -> would either have no effect beyond verbosity, or if it did have effect, it +> would either have no effect beyond verbosity, or if it _did_ have effect, it > wouldn't be a net positive effect due to code being written in a less > straightforward manner in order to avoid mutation. > From 7e688a37c33a899128780ba84576a0efc27a603f Mon Sep 17 00:00:00 2001 From: Josh L Date: Wed, 18 Jun 2025 21:04:02 +0000 Subject: [PATCH 43/57] Checkpoint progress. --- proposals/p5434.md | 232 +++++++++++++++++++++++++++++---------------- 1 file changed, 150 insertions(+), 82 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 2799d429a63a8..3e7df8a52cf4e 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -22,6 +22,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Compound return forms and patterns](#compound-return-forms-and-patterns) - [Nested binding patterns](#nested-binding-patterns) - [Mututation restriction on objects bound to a value](#mututation-restriction-on-objects-bound-to-a-value) + - [No optimization on erroneous behavior](#no-optimization-on-erroneous-behavior) - [`bound` parameters](#bound-parameters) - [How addresses interact with `ref`](#how-addresses-interact-with-ref) - [Improved C++ interop and migration](#improved-c-interop-and-migration) @@ -74,7 +75,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - The return of a function can optionally be marked `ref`, `val`, or `var`. These control the category of the call expression invoking the function, and how the return expression is returned. - - These may be mixed for functions returning parens or brace forms. + - These may be mixed for functions returning tuple or struct forms. - The address of a `ref` binding is `nocapture` and `noalias`. - We mark parameters of a function that may be referenced by the return value with `bound`. @@ -368,14 +369,14 @@ var j: i32 = ReturnDefault(); adopted in the future, if the simple rule proves inadequate. A function may have multiple returns, each with their own marker, by using a -paren or brace compound return form. +tuple or struct compound return form. ```carbon -fn ParenReturn() -> (val bool, ref i32, C) { +fn TupleReturn() -> (val bool, ref i32, C) { return (true, global, {.x = 3}); } -fn BraceReturn() +fn StructReturn() -> {.a: val bool, .b: ref i32, .c: C} { @@ -435,83 +436,145 @@ limitations. ### Compound return forms and patterns -Mirroring the [paren](/docs/design/pattern_matching.md#tuple-patterns) and -[brace](/docs/design/pattern_matching.md#struct-patterns) pattern forms, we also -support paren and brace return forms. +Mirroring the [tuple](/docs/design/pattern_matching.md#tuple-patterns) and +[struct](/docs/design/pattern_matching.md#struct-patterns) pattern forms, we +also support tuple and struct return forms. -FIXME: Replace this +```carbon +// `->` begins a "return form" +fn F() -> ; + +// Within any return form, if the first token is +// `val`, `ref`, `var`, `(`, or `{`, it is not +// treated as type expression: + +// Value return, with type as specified +fn Val() -> val + +// Reference return, with type as specified +fn Ref() -> ref + +// Initializing return, with type as specified +fn Var() -> var + +// Tuple compound return, with a list of +// return forms. +fn TupleCompound() -> ( , ... ) +// Tuple return, with a list of type +// expressions. Used if all members of the +// list are type expressions. +fn Tuple() -> ( , ... ) + +// Struct compound return, with a mapping from +// designators to return forms. +fn StructCompound() + -> { .: , ... } +// Struct return, used if all of the members +// are type expressions. +fn Struct() -> { .: , ... } + +// Otherwise, implicit `var` means returns +// an initializing expression. +fn Other() -> +``` -Every element of these forms starts with `->` and the kind of return (if not -default). +Note that in the absence of `val`, `ref`, and `var` keywords, the implicit `var` +is placed in the outermost position, minimizing the number of returns. So +`fn F() -> (i32, i32)` means `fn F() -> var (i32, i32)` not +`fn F() -> (var i32, var i32)`. Generally the `var` is left off if not required, +and so will be rare in return forms, to minimize confusion with `val`. ```carbon -// Paren return form -fn ParenReturn(...) - -> (->val bool, ->ref i32, -> C); +fn TupleReturn(...) + -> (val bool, ref i32, C); +// Equivalent to: +// -> (val bool, ref i32, var C); let (a: bool, ref b: i32, var c: C) - = ParenReturn(...); + = TupleReturn(...); -fn BraceReturn(...) - -> {->val .a: bool, - ->ref .b: i32, - -> .c: C}; +fn StructReturn(...) + -> {.a: val bool, + .b: ref i32, + .c: C}; +// Equivalent to: +// -> {.a: val bool, +// .b: ref i32, +// .c: var C}; // Binds to the names `x`, `y`, `z`: let {.a = x: bool, .b = ref y: i32, - .c = var z: C} = BraceReturn(...); + .c = var z: C} = StructReturn(...); // Binds to the names `a`, `b`, `c`: let {a: bool, ref b: i32, - var c: C} = BraceReturn(...); + var c: C} = StructReturn(...); // Above two can be mixed, binding to // names `a`, `y`, `z`. let {a: bool, .b = ref y: i32 - .c = var z: C} = BraceReturn(...); + .c = var z: C} = StructReturn(...); ``` -Note that we can distinguish between returning a compound form and a tuple or -struct with one token of lookahead. Only types are allowed after a `-> val` or -`-> ref`, not a compound return form. Examples: +Only types are allowed after a `-> val`, `-> ref`, or `-> var`, not a compound +return form. Examples: ```carbon // Returns a tuple of type // `(bool, f32, C, i32)`. -fn TupleReturn(...) -> (bool, f32, C, i32); +fn OneTupleReturn(...) + -> (bool, f32, C, i32); -// ❌ Invalid, every element of a compound return form must -// start with `->`. -fn Invalid1(...) -> (bool, ->val f32); +// Returns a compound tuple form +fn CompoundReturn(...) + -> (bool, val f32); +// Equivalent to: +// -> (var bool, val f32); -// ❌ Invalid, can only specify return category -// on innermost `->`. -fn Invalid2(...) -> val (-> bool, -> f32); +// ❌ Invalid, can't specify `ref` inside +// of `val`. +fn Invalid(...) -> val (bool, ref f32); ``` -To nest, use the default `->` in the outer form, as in: +The compound return forms may be nested, as in: ```carbon -fn BracesInParens(...) - -> (->{-> .a: bool, ->val .b: f32}, -> C, ->ref i32); +fn CompoundInParens(...) + -> ({.a: bool, .b: val f32}, C, ref i32); +// Equivalent to: +// -> ({.a: var bool, .b: val f32}, var C, ref i32); let ({.a = var x: bool, .b = val y: f32}, - var c: C, ref d: i32) = BracesInParens(); + var c: C, ref d: i32) = CompoundInParens(...); // or without renaming: -let ({var a: bool, val b: f32}, - var c: C, ref d: i32) = BracesInParens(); +let ({var a: bool, b: f32}, + var c: C, ref d: i32) = CompoundInParens(...); -fn ParensInBraces(...) - -> {-> .a: bool, - -> .b: (->val f32, -> C), - ->ref .c: i32}; +// Contrast with a compound tuple form containing +// a struct type (not compound): +fn StructInParens(...) + -> ({.a: bool, .b: f32}, C, ref i32); +// Equivalent to: +// -> (var {.a: bool, .b: f32}, var C, ref i32); + +fn CompoundInBraces(...) + -> {.a: bool, .b: (val f32, C), .c: ref i32}; +// Equivalent to: + -> {.a: var bool, .b: (val f32, var C), .c: ref i32}; let {a: bool, .b = (x: f32, var y: C), ref c: i32} = ParensInBraces(...); + +// Contrast with a compound struct form containing +// a tuple type (not compound): +fn TupleInBraces(...) + -> {.a: bool, .b: (f32, C), .c: ref i32}; +// Equivalent to: + -> {.a: var bool, .b: var (f32, C), .c: ref i32}; ``` This feature is intended to support cases like `enumerate` that will want to @@ -546,50 +609,43 @@ Note that `ref` is disallowed inside `var` since that would be redundant. ### Mututation restriction on objects bound to a value -FIXME: https://github.com/carbon-language/carbon-lang/issues/5524 - -1. Are we comfortable requiring no mutation of objects with a non-copy-value - representation and an active `let` binding? -2. Do we expect our safety story to enforce this at compile time with our - strictly safe dialect? -3. Are we comfortable making this erroneous behavior when we are unable to or - choose not to enforce this requirement at compile time? -4. Are we comfortable deferring the decision of whether strict enforcement of - this property is enabled in our current C++-friendly mode until our safety - design is complete? - -Discussed 2025-05-22 -https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.uot97ukynlsi - -- a) I think we should _initially_ make mutation of objects with a - non-copy-value representation in an active `let` binding erroneous behavior. -- b) I don't think we should ever make it anything more than erroneous - behavior, because we don't need to. See my note on erroneous behavior below. - If we can prove the mutation doesn't occur, then we can use that under - "as-if", and we don't need anything else. -- c) I think we will always have potential for unchecked code that _could_ in - theory mutate due to unsafe code and C++ interop. This goes to my point - about erroneous behavior. -- d) I think we should try to enforce the lack of mutation in the strict safe +Mutation of objects with a non-copy-value representation in an active `let` +binding ("borrowed objects") is erroneous behavior. + +- Our plan is to prevent mutation of borrowed objects in Carbon's strict safe dialect. - We should only relax our stance here and consider making such mutation _allowed_ if we discover difficulty with this that we cannot overcome. - But we _should_ revisit the underlying idea of mutation being erroneous if enforcing it in the strict mode proves fundamentally untenable due to ergonomic costs. -- e) I am very comfortable deferring the decision of whether strict - enforcement is enabled in our current C++-friendly mode. - ---- - -On erroneous behavior: I don't think we should optimize on erroneous behavior, -ever, unless the compiler literally proves that it does not occur, ever. In -which case, we don't need any license to start optimizing on this, as it falls -under "as-if". Which means I don't think we should optimize on erroneous -behavior, ever. We should _only_ optimize on "as-if". The fact that UB provides -"as-if" _without_ a proof is precisely the risk of using UB for any semantics, -and why we don't use it here and elsewhere we use erroneous behavior. I should -probably write this up as a principle. +- There will always be the potential for unchecked code, either unsafe Carbon + code or C++ code by way of interop, to mutate a borrowed object, hence the + need to define it as erroneous behavior. +- There is no need to ever make it anything more than erroneous behavior, see + below. +- If we can prove the mutation doesn't occur, then we can use that to optimize + under "as-if", and we don't need anything else. +- We are deferring the decision of whether strict enforcement is enabled in + Carbon's current C++-friendly mode, when not explicitly marking the code as + "unsafe." + +This was +[discussed 2025-05-22](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.uot97ukynlsi) +and then made the subject of +[leads issue #5524](https://github.com/carbon-language/carbon-lang/issues/5524). + +### No optimization on erroneous behavior + +The Carbon compiler should not optimize on erroneous behavior, ever, unless the +compiler literally proves that it does not occur, ever. In which case, we don't +need any license to start optimizing on this, as it falls under "as-if". + +The fact that undefined behavior ("UB", +[cppreference](https://en.cppreference.com/w/cpp/language/ub.html), +[wikipedia](https://en.wikipedia.org/wiki/Undefined_behavior)) provides "as-if" +_without_ a proof is precisely the risk of using UB for any semantics, and why +we don't use it here and elsewhere we use erroneous behavior. ### `bound` parameters @@ -1436,7 +1492,7 @@ We considered other options for the syntax of compound return forms on and [2025-05-14](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.52tb7l2he343). The option of omitting the `->` in each component did not distinguish tuples -from paren return forms sufficiently: +from tuple return forms sufficiently: ```carbon -> (ref i32, var i32) @@ -1453,8 +1509,20 @@ and whether we want to expose users to that in cases we might be able to avoid it. The original proposed syntax used an arrow `->` in each component of a compound -form. This avoided ambiguity, but was verbose and visually noisy. An alternative -was suggested in +form. + +```carbon +fn TupleReturn(...) + -> (->val bool, ->ref i32, -> C); + +fn StructReturn(...) + -> {->val .a: bool, + ->ref .b: i32, + -> .c: C}; +``` + +This avoided ambiguity, but was verbose and visually noisy. An alternative was +suggested in [discussion on 2025-06-13](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.52ru7ner80b4). FIXME From 7787bd04e41bfc98035304622b43a3eea36d5a7b Mon Sep 17 00:00:00 2001 From: Josh L Date: Wed, 18 Jun 2025 21:34:31 +0000 Subject: [PATCH 44/57] Checkpoint progress. --- proposals/p5434.md | 58 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 14 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 3e7df8a52cf4e..f3431456cee6d 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -58,6 +58,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Other syntax for compound return forms](#other-syntax-for-compound-return-forms) - [`ref` parameters allow aliasing](#ref-parameters-allow-aliasing) - [`let` to mark value returns instead of `val`](#let-to-mark-value-returns-instead-of-val) + - [`=>` infers form, not just type](#-infers-form-not-just-type) @@ -341,11 +342,20 @@ fn ReturnVal() -> val i32 { // ReturnVal() is a value expression. let l: i32 = ReturnVal(); +// Returning an initializing expression is the +// default. fn ReturnDefault() -> i32 { return 2; } // ReturnDefault() is an initializing expression. var j: i32 = ReturnDefault(); + +// Use `var` to explicitly specify returning an +// initializing expression. +fn ReturnVar() -> var i32 { + return 2; +} +// `ReturnVar()` is the same as `ReturnDefault()`. ``` - A call to a function declared `-> ref T` is a durable reference expression. @@ -356,17 +366,15 @@ var j: i32 = ReturnDefault(); the value representation may be returned in registers. - The behavior of a call to a function declared `-> T` is unchanged. It is an initializing expression, returning in place or by copy depending on the - initializing representation of `T`. + initializing representation of `T`. This is the same behavior as `-> var T`. - The behavior of `auto` as the return type is unchanged, but now supports an - optional `ref` or `val` between the `->` and `auto`. `-> auto` continues to - return an initializing expression, `-> val auto` returns a value expression, - and `-> ref auto` returns a durable reference expression. + optional `ref`, `val`, or `var` between the `->` and `auto`. `-> auto` + continues to return an initializing expression, as does `-> var auto`. + `-> val auto` returns a value expression, and `-> ref auto` returns a + durable reference expression. - Using `=>` to specify a return continues to return an initializing - expression, as before. Trying to infer the expression category from the - category of the expression after the `=>` runs into the problem of this - often requiring the parameters to be marked `bound`. A more complicated - rule, for example using whether any parameter is marked `bound`, could be - adopted in the future, if the simple rule proves inadequate. + expression, as before. See + [this relevant alternative considered](#-infers-form-not-just-type). A function may have multiple returns, each with their own marker, by using a tuple or struct compound return form. @@ -1524,13 +1532,19 @@ fn StructReturn(...) This avoided ambiguity, but was verbose and visually noisy. An alternative was suggested in [discussion on 2025-06-13](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.52ru7ner80b4). +This alternative used a default of compound returns for paren `(`...`)` and +brace `{`...`}` expressions, which you could opt out of by using one of the +three category keywords such as `var` to introduce a type expression that would +not be considered a compound return. -FIXME - -This was then refined in -[discussion on 2025-06-16](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.rdbzk5jnin3x). +This had the downside that `-> T` would be interpreted as `-> var T` even if `T` +was a tuple type like `(i32, i64)`. However, textually substituting in +`(i32, i64)` in for `T` to get `-> (i32, i64)` would instead be interpreted as +`-> (var i32, var i64)`. -FIXME +To overcome this problem, in +[discussion on 2025-06-16](https://docs.google.com/document/d/1Yt-i5AmF76LSvD4TrWRIAE_92kii6j5yFiW-S7ahzlg/edit?tab=t.0#heading=h.rdbzk5jnin3x), +we switched to the approach from this proposal. ### `ref` parameters allow aliasing @@ -1542,4 +1556,20 @@ conditions can be shown to hold statically. ### `let` to mark value returns instead of `val` +This proposal initially used `let` instead of `val` to mark immutable value +returns. [`ref` and `val` returns](#ref-and-val-returns) + FIXME: https://github.com/carbon-language/carbon-lang/issues/5522 + +### `=>` infers form, not just type + +There was support for the idea that the `=>` return syntax introduced in +[proposal #3848](https://github.com/carbon-language/carbon-lang/pull/3848) +should deduce the form of the return, not just its type. This was discussed in +[#lambdas on 2025-05-20](https://discord.com/channels/655572317891461132/999638000126394370/1374462658723450981). + +However, trying to infer the expression category from the category of the +expression after the `=>` runs into the problem of this often requiring the +parameters to be marked `bound`. A more complicated rule, for example using +whether any parameter is marked `bound`, could be adopted in the future, if the +simple rule proves inadequate. From 37b5c8445cde9fea6ffc9178028b11a0df29e3b5 Mon Sep 17 00:00:00 2001 From: Josh L Date: Wed, 18 Jun 2025 21:40:40 +0000 Subject: [PATCH 45/57] Checkpoint progress. --- proposals/p5434.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/proposals/p5434.md b/proposals/p5434.md index f3431456cee6d..1db730ef074df 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -1573,3 +1573,7 @@ expression after the `=>` runs into the problem of this often requiring the parameters to be marked `bound`. A more complicated rule, for example using whether any parameter is marked `bound`, could be adopted in the future, if the simple rule proves inadequate. + +Alternatively, the compiler could infer which parameters should be marked +`bound` in this case. That is something to consider with the memory safety +design. From a11e69fe3b18c551083a98fcc3e507fd42a06f4d Mon Sep 17 00:00:00 2001 From: Josh L Date: Wed, 18 Jun 2025 21:59:41 +0000 Subject: [PATCH 46/57] Checkpoint progress. --- proposals/p5434.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 1db730ef074df..c1f4a6b5256ef 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -1556,10 +1556,18 @@ conditions can be shown to hold statically. ### `let` to mark value returns instead of `val` -This proposal initially used `let` instead of `val` to mark immutable value -returns. [`ref` and `val` returns](#ref-and-val-returns) - -FIXME: https://github.com/carbon-language/carbon-lang/issues/5522 +This proposal initially used `let` instead of `val` to mark +[immutable value returns](#ref-and-val-returns). However, `let` is used, in +Carbon and other languages, primarily to bind names. In Carbon, the default +binding is a value binding, but that was not considered a close enough to +connect the `let` keyword to value semantics. + +There was also a concern about reusing `let` in multiple contexts to mean +different things, and having separate keywords that were only used to mark the +category of the binding was deemed better separation of concerns. + +This question was considered in +[leads issue #5522](https://github.com/carbon-language/carbon-lang/issues/5522). ### `=>` infers form, not just type From c8e37f3ab968a7cae884b89f6eac494ee8552a19 Mon Sep 17 00:00:00 2001 From: Josh L Date: Wed, 18 Jun 2025 22:06:26 +0000 Subject: [PATCH 47/57] `init` --- proposals/p5434.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/proposals/p5434.md b/proposals/p5434.md index c1f4a6b5256ef..978315b6f8bea 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -1566,6 +1566,14 @@ There was also a concern about reusing `let` in multiple contexts to mean different things, and having separate keywords that were only used to mark the category of the binding was deemed better separation of concerns. +We considered making a parallel change to use `init` instead of `var`, but this +had some problems: + +- By making initializing returns the default, there is little expected usage, + so perhaps not worth spending another keyword on. +- The `init` keyword would be particularly expensive, because C++ code + commonly use that word in APIs. + This question was considered in [leads issue #5522](https://github.com/carbon-language/carbon-lang/issues/5522). From eb1df53f4e9d46f56fee4a15d4d0e0a20e953f77 Mon Sep 17 00:00:00 2001 From: Josh L Date: Tue, 1 Jul 2025 23:39:09 +0000 Subject: [PATCH 48/57] Update custom value representation syntax to use `ref` keyword --- docs/design/values.md | 7 +++++-- proposals/p5434.md | 7 +++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/design/values.md b/docs/design/values.md index b1a148913c1d7..dea4089ee9732 100644 --- a/docs/design/values.md +++ b/docs/design/values.md @@ -640,6 +640,9 @@ specialized constructs given the specialized nature of these operations. ### Reference types +TODO: This section needs to be updated to reflect +[proposal #5434](/proposals/p5434.md). + Unlike C++, Carbon does not currently have reference types. The only form of indirect access are pointers. There are a few aspects to this decision that need to be separated carefully from each other as the motivations and considerations @@ -883,8 +886,8 @@ keyword. It isn't final at all and likely will need to change to read well. The provided representation type must be one of the following: - `const Self` -- this forces the use of a _copy_ of the object. -- `const Self *` -- this forces the use of a [_pointer_](#pointers) to the - original object. +- `ref` -- this forces the use of a [_pointer_](#pointers) to the original + object. - A custom type that is not `Self`, `const Self`, or a pointer to either. If the representation is `const Self` or `const Self *`, then the type fields diff --git a/proposals/p5434.md b/proposals/p5434.md index 978315b6f8bea..8a61a3dc76879 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -33,6 +33,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [Use case: member binding interfaces](#use-case-member-binding-interfaces) - [Use case: class accessors](#use-case-class-accessors) - [Type completeness](#type-completeness) + - [Pointer value representation](#pointer-value-representation) - [Future work](#future-work) - [Temporary lifetimes](#temporary-lifetimes) - [`ref` bindings in lambdas](#ref-bindings-in-lambdas) @@ -1056,6 +1057,12 @@ be if you were using a value binding with a pointer type instead. We may need to change this in the future to match C++ which treats reference types like pointer types for completeness purposes. +### Pointer value representation + +Purely as a change in syntax, the way to specify that +[the value representation of a class](/docs/design/values.md#value-representation-and-customization) +uses a pointer is changed from writing `const Self *` to `ref`. + ## Future work ### Temporary lifetimes From 9ec8a8510b0da6eb8325e2a60d604f1be63c1c06 Mon Sep 17 00:00:00 2001 From: Josh L Date: Tue, 1 Jul 2025 23:54:24 +0000 Subject: [PATCH 49/57] Add text to hopefully address confusion, as suggested in review --- proposals/p5434.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 8a61a3dc76879..c83c816342896 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -1054,8 +1054,12 @@ class HasMember { Not a change by this proposal, but note that our existing rules will require the type in a `ref` binding to be complete in situations where it would not need to be if you were using a value binding with a pointer type instead. We may need to -change this in the future to match C++ which treats reference types like pointer -types for completeness purposes. +change this in the future to match C++ which treats reference types more like +pointer types for completeness purposes. + +After this change, a `ref` binding to type `T` will require `T` to be complete +in the same situations that other bindings to type `T` require `T` to be +complete. ### Pointer value representation From 50417f48d1aeb7c9a17901a71aeb29110022068d Mon Sep 17 00:00:00 2001 From: josh11b <15258583+josh11b@users.noreply.github.com> Date: Tue, 8 Jul 2025 07:20:21 -0700 Subject: [PATCH 50/57] Apply suggestions from code review Co-authored-by: Chandler Carruth --- docs/design/expressions/indexing.md | 6 +++--- docs/design/values.md | 4 ++-- proposals/p5434.md | 21 +++++++++++---------- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/docs/design/expressions/indexing.md b/docs/design/expressions/indexing.md index 74c7364cffe47..1fb95b7c1ef2b 100644 --- a/docs/design/expressions/indexing.md +++ b/docs/design/expressions/indexing.md @@ -54,7 +54,7 @@ Its semantics are defined in terms of the following interfaces: ``` interface IndexWith(SubscriptType:! type) { let ElementType:! type; - fn At[bound self: Self](subscript: SubscriptType) -> let ElementType; + fn At[bound self: Self](subscript: SubscriptType) -> val ElementType; fn Ref[bound ref self: Self](subscript: SubscriptType) -> ref ElementType; } @@ -84,7 +84,7 @@ final impl forall [SubscriptType:! type, T:! IndirectIndexWith(SubscriptType)] T as IndexWith(SubscriptType) { where ElementType = T.(IndirectIndexWith(SubscriptType).ElementType); - fn At[bound self: Self](subscript: SubscriptType) -> let ElementType { + fn At[bound self: Self](subscript: SubscriptType) -> val ElementType { return self.(IndirectIndexWith(SubscriptType).Ref)(index); } fn Ref[bound ref self: Self](subscript: SubscriptType) -> ref ElementType { @@ -104,7 +104,7 @@ An array type could implement subscripting like so: class Array(template T:! type) { impl as IndexWith(like i64) { let ElementType:! type = T; - fn At[bound self: Self](subscript: i64) -> let T; + fn At[bound self: Self](subscript: i64) -> val T; fn Ref[bound ref self: Self](subscript: i64) -> ref T; } } diff --git a/docs/design/values.md b/docs/design/values.md index dea4089ee9732..99880cc8bd540 100644 --- a/docs/design/values.md +++ b/docs/design/values.md @@ -886,8 +886,8 @@ keyword. It isn't final at all and likely will need to change to read well. The provided representation type must be one of the following: - `const Self` -- this forces the use of a _copy_ of the object. -- `ref` -- this forces the use of a [_pointer_](#pointers) to the original - object. +- `const ref` -- this forces the use of a [_pointer_](#pointers) to the + original object, but with the `const` API subset. - A custom type that is not `Self`, `const Self`, or a pointer to either. If the representation is `const Self` or `const Self *`, then the type fields diff --git a/proposals/p5434.md b/proposals/p5434.md index c83c816342896..77afffa7457de 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -1237,7 +1237,7 @@ expression into pieces loses the tools the expression provided to you. The missing tool for that was reference bindings. We saw how much we were leaning on value bindings. The asymmetry between having -value binding but not referenced bindings when have value expressions and +value binding but not reference bindings when have value expressions and reference expressions was creating pressure. For example, when accessing members of an object, we had to escape to pointers in that operator. @@ -1446,7 +1446,7 @@ We considered various syntax options on but none of them seemed good enough to justify inclusion at this time: ```carbon -fn F(...) -> (->ref R, ->val L, -> V) { +fn F(...) -> (ref R, val L, V) { // No longer a `var` being returned. Ideally these // shouldn't have to be initialized together. returned ??? (ref r: R, val l: L, var v: V) = ...? @@ -1456,20 +1456,20 @@ fn F(...) -> (->ref R, ->val L, -> V) { // We could restrict to one `var` return component, // but this is a lot of machinery for a small increase // in expressiveness and applicability. -fn F(...) -> (->ref R, ->val L, -> V) { +fn F(...) -> (ref R, val L, V) { returned var v: V = ...; let l: L = ...; return (*r, l, var); } -fn F(...) -> (->ref R, ->val L, -> V) { +fn F(...) -> (ref R, val L, V) { // These don't have the right category, and ideally // shouldn't have to be initialized together. returned var ret: (R, L, V) = ...? return var; } -fn F(...) -> (->ref R, ->val L, -> V) { +fn F(...) -> (ref R, val L, V) { returned var (_, _, var v: V) = ; } ``` @@ -1477,9 +1477,10 @@ fn F(...) -> (->ref R, ->val L, -> V) { There was another approach we considered for `returned var` originally: ```carbon -fn F(...) -> (->ref R, ->val L, -> v1: V1, -> v2: V2) { +fn F(...) -> (ref R, val L, v1: V1, var v2: V2) { // ... - // Must use the same names for `->` returns with bound names. + // Must use the same names for the `var` (implicit or explicit) returns + // with bound names. return (r, l, v1, v2); } ``` @@ -1493,11 +1494,11 @@ But this had downsides that still apply: Our current approach handles our main use case for `returned var`: factory functions. -We could support an "only `->`s" approach in the future if we want: +We could support an "only `var`s" approach in the future if we want: ```carbon -fn F(...) -> (-> V1, -> V2, -> V3) { - returned var (v1: V1, v2: V2, v3: V3) = ...; +fn F(...) -> (var V1, var V2, var V3) { + returned (var v1: V1, var v2: V2, var v3: V3) = ...; // ... return var; } From 08f332b0c8b3fe8a8c1ee99c8e2e73e5975bf974 Mon Sep 17 00:00:00 2001 From: Josh L Date: Tue, 8 Jul 2025 14:23:09 +0000 Subject: [PATCH 51/57] Checkpoint progress. --- proposals/p5434.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 77afffa7457de..7f5b20f30a975 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -1065,7 +1065,7 @@ complete. Purely as a change in syntax, the way to specify that [the value representation of a class](/docs/design/values.md#value-representation-and-customization) -uses a pointer is changed from writing `const Self *` to `ref`. +uses a pointer is changed from writing `const Self *` to `const ref`. ## Future work From 1f5129aea0033bde4e1e6ebdfac1afc99c1cb198 Mon Sep 17 00:00:00 2001 From: Josh L Date: Sun, 20 Jul 2025 18:49:14 +0000 Subject: [PATCH 52/57] Checkpoint progress. --- docs/design/values.md | 12 ++++++------ proposals/p5434.md | 14 ++++++++++---- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/docs/design/values.md b/docs/design/values.md index 99880cc8bd540..d810d90fb37e2 100644 --- a/docs/design/values.md +++ b/docs/design/values.md @@ -414,9 +414,9 @@ the available implementation strategies. > **Future work:** The interaction between a > [custom value representation](#value-representation-and-customization) and a > value expression used with a polymorphic type needs to be fully captured. -> Either it needs to restrict to a `const Self*` style representation (to -> prevent slicing) or it needs to have a model for the semantics when a -> different value representation is used. +> Either it needs to restrict to a `const ref` style representation (to prevent +> slicing) or it needs to have a model for the semantics when a different value +> representation is used. ### Interop with C++ `const &` and `const` methods @@ -890,8 +890,8 @@ The provided representation type must be one of the following: original object, but with the `const` API subset. - A custom type that is not `Self`, `const Self`, or a pointer to either. -If the representation is `const Self` or `const Self *`, then the type fields -will be accessible as [_value expressions_](#value-expressions) using the normal +If the representation is `const Self` or `const ref`, then the type fields will +be accessible as [_value expressions_](#value-expressions) using the normal member access syntax for value expressions of a type. These will be implemented by either accessing a copy of the object in the non-pointer case or a pointer to the original object in the pointer case. A representation of `const Self` @@ -902,7 +902,7 @@ used. If no customization is provided, the implementation will select one based on a set of heuristics. Some examples: -- Non-copyable types and polymorphic types would use a `const Self*`. +- Non-copyable types and polymorphic types would use a `const ref`. - Small objects that are trivially copied in a machine register would use `const Self`. diff --git a/proposals/p5434.md b/proposals/p5434.md index 7f5b20f30a975..3306dba7814b1 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -78,6 +78,8 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception These control the category of the call expression invoking the function, and how the return expression is returned. - These may be mixed for functions returning tuple or struct forms. +- Any parameters whose lifetime needs to contain the lifetime of the return + must be marked `bound`. - The address of a `ref` binding is `nocapture` and `noalias`. - We mark parameters of a function that may be referenced by the return value with `bound`. @@ -271,16 +273,20 @@ Assert(c.Get() == 7); Normally an argument to a non-`ref` parameter should not be marked `ref`, but it is allowed in a generic context where the parameter may sometimes be `ref`. -Operators will mostly not take `ref` parameters, with these exceptions: +Expression operators will mostly not take `ref` parameters, with these +exceptions: - [the address-of operator](/docs/design/expressions/pointer_operators.md) `&`; - the first operand of - [the indexing operator](/docs/design/expressions/indexing.md) `[`...`]`; + [the indexing operator](/docs/design/expressions/indexing.md) `[`...`]`; and - [the member access operator](#use-case-member-binding-interfaces) introduced in [proposal #3720: "Member binding operators"](https://github.com/carbon-language/carbon-lang/pull/3720) - `.`; and + `.`. + +The statement operators now use `ref` instead of pointers: + - the left-hand operand of [assignment operators](/docs/design/assignment.md) such as `=` and `+=`. - [the `++` and `--` operators](/docs/design/assignment.md). @@ -896,7 +902,7 @@ interface Deref { final impl forall [T:! type] T* as Deref { where Result = T; - fn Op[bound ref self: Self]() -> ref T + fn Op[bound self: Self]() -> ref T = "builtin.deref"; } ``` From 90d4ec496291bed80c599ef986b2fa0d7d8b5660 Mon Sep 17 00:00:00 2001 From: Josh L Date: Mon, 21 Jul 2025 16:49:54 +0000 Subject: [PATCH 53/57] Implement suggestions --- proposals/p5434.md | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 3306dba7814b1..5b7339d10b72e 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -135,7 +135,7 @@ won't. which is governed by [pointer capture rules](https://llvm.org/docs/LangRef.html#pointer-capture). - Clang allows C++ code to use the - [`clang::lifetimebound` attribute](https://clang.llvm.org/docs/AttributeReference.html#id8) + [`clang::lifetimebound` attribute](https://clang.llvm.org/docs/AttributeReference.html#lifetimebound) to mark parameters that may be referenced by the return value, in order to detect some classes of use-after-free memory-safety bugs. - [C++ has reference types](https://en.cppreference.com/w/cpp/language/reference). @@ -664,8 +664,9 @@ we don't use it here and elsewhere we use erroneous behavior. ### `bound` parameters -It is an error to return a reference to a temporary object that won't live once -the function returns, even if it is a parameter marked `bound`. +It is erroneous behavior to return something that references a local object that +won't live once the function returns, even if it is a parameter marked `bound`. +Local objects include local variables, temporary objects, and `var` parameters. ```carbon fn Invalid1() -> i32* { @@ -815,11 +816,12 @@ The address of a `ref` binding is `noalias` and either `captures(none)` or - `captures(ret: address, provenance)`: is like `captures(none)` but may be referenced by a return. -The `noalias` semantics are the minimum for the "move-in-move-out" optimization. -But this condition is hard to check, so safe code will use a stricter criteria. -Unsafe code will be required to adhere to the looser `noalias` restrictions, but -will not be checked (except possibly by a sanitizer at runtime). The details -here will be tackled as part of the memory safety design. +The combination of `noalias` and `captures(none)` semantics are the minimum for +the "move-in-move-out" optimization. But this condition is hard to check, so +safe code will use a stricter criteria. Unsafe code will be required to adhere +to just the `noalias` restrictions, but will not be checked (except possibly by +a sanitizer at runtime). The details here will be tackled as part of the memory +safety design. Optimizations will only be performed based on information that is enforced or checked by the compiler, so these attributes won't be passed to LLVM unless @@ -869,20 +871,27 @@ handle `ref`. ### Interaction with `returned var` -The rule is: `returned var` may only be used when there is a single component to -the return form, and it is default `->`. +The rule is: `returned var` may only be used when there is a single atomic +return form, and it is the default `var` category. ```carbon +// ✅ Allowed fn F(...) -> V { returned var v: V = ...; // ... return var; } -fn F(...) -> {-> .a : T} { +fn F(...) -> {var .a : T} { + // ❌ Invalid: composite form + returned var ret: T = ...; + // ... +} + +fn F(...) -> val T { + // ❌ Invalid: value return returned var ret: T = ...; // ... - return var; } ``` From d14726de3b8f524ff819b3f61ca65be3d29eaedc Mon Sep 17 00:00:00 2001 From: Josh L Date: Mon, 21 Jul 2025 20:42:55 +0000 Subject: [PATCH 54/57] Implement suggestions --- proposals/p5434.md | 61 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 12 deletions(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 5b7339d10b72e..7f293586fde8e 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -25,7 +25,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - [No optimization on erroneous behavior](#no-optimization-on-erroneous-behavior) - [`bound` parameters](#bound-parameters) - [How addresses interact with `ref`](#how-addresses-interact-with-ref) - - [Improved C++ interop and migration](#improved-c-interop-and-migration) + - [Improved interop and migration with C++ references](#improved-interop-and-migration-with-c-references) - [Part of the expression type system, not object types](#part-of-the-expression-type-system-not-object-types) - [Interaction with `returned var`](#interaction-with-returned-var) - [Use case: `Deref` interface](#use-case-deref-interface) @@ -90,6 +90,7 @@ Reference bindings have come up multiple times: - as a better alternative to `addr self: Self*`; - for use in [lambda captures](/docs/design/lambdas.md); +- to model different kinds of C++ references for interop and migration; - to support nested bindings within a destructured `var`, see [issue #5250](https://github.com/carbon-language/carbon-lang/issues/5250) and @@ -210,8 +211,11 @@ class C { Potentially abbreviating the syntax further (to allow `ref self` as a short form of `ref self: Self`) is left as future work. -The `ref` modifier is allowed on any `:` binding that's not inside a `var` -pattern. +The `ref` modifier is allowed on `:` bindings that are not: + +- inside a `var` pattern, +- a field of a `class` type, or +- a field of a struct type. ```carbon fn AddTwoToRef(ref x: i32) { @@ -239,15 +243,36 @@ let {var a: i32, ref b: i32} = ...; let {.a = var a: i32, .b = ref b: i32} = ...; ``` -The `ref` modifier is forbidden on the bindings in `class` or `struct` fields. +> Note: This takes us one step closer to `{` ambiguity. Previously we could +> distinguish between a struct literal/pattern and a non-empty block with only +> up to two tokens of lookahead (the struct cases start with `.` or `_` or +> identifier followed by `:`, and the block cases don't). Now we have things +> like: +> +> ```carbon +> fn F() -> X { var a: i32 = 0; } +> ``` +> +> ... where we're getting incrementally closer to ambiguity. We've got a few +> more steps before we get there, though, since we don't have an `X{...}` +> expression yet, and `var ...` is only allowed in struct patterns rather than +> struct expressions. So we're still fine, but this is cutting down our options +> for future syntactic expansion a little. + +The `ref` modifier is forbidden on the bindings in `class` or struct type +fields. ``` -var OuterSize:! i32 = 123; +var outer_size: i32 = 123; class Invalid { // ❌ Invalid. - let ref Size:! i32 = OuterSize; + let ref invalid_ref_field: i32 = outer_size; } + +// ❌ Invalid. +var invalid_struct_type_field: + {ref .invalid: i32} = {.invalid = outer_size}; ``` In a function argument list, arguments to non-`self` `ref` parameters are also @@ -315,6 +340,12 @@ fn G() -> bool { } ``` +Enforcing this restriction will be part of the memory safety story. Until then, +doing this is erroneous behavior. This +[means](#no-optimization-on-erroneous-behavior) that the compiler won't use +those LLVM attributes unless the compiler can itself prove that the restrictions +hold. + ### `ref` and `val` returns The return of a function can optionally be marked `ref` or `val`. These control @@ -439,8 +470,9 @@ fn Invalid2() -> ref i32 { } ``` -The address of a `bound ref` parameter is `captures(ret: address, provenance)` -instead of `captures(none)`. +The address of a `bound ref` parameter is the +[LLVM attribute `captures(ret: address, provenance)`](#background) instead of +[`captures(none)`](#background). ## Details @@ -494,8 +526,8 @@ fn Other() -> ``` Note that in the absence of `val`, `ref`, and `var` keywords, the implicit `var` -is placed in the outermost position, minimizing the number of returns. So -`fn F() -> (i32, i32)` means `fn F() -> var (i32, i32)` not +is placed in the outermost position, minimizing the number of primitive forms +returned. So `fn F() -> (i32, i32)` means `fn F() -> var (i32, i32)` not `fn F() -> (var i32, var i32)`. Generally the `var` is left off if not required, and so will be rare in return forms, to minimize confusion with `val`. @@ -624,7 +656,7 @@ Note that `ref` is disallowed inside `var` since that would be redundant. ### Mututation restriction on objects bound to a value -Mutation of objects with a non-copy-value representation in an active `let` +Mutation of objects with a non-copy-value representation in an active value binding ("borrowed objects") is erroneous behavior. - Our plan is to prevent mutation of borrowed objects in Carbon's strict safe @@ -837,7 +869,7 @@ These restrictions are experimental, and we should keep track of everything we end up needing to do to work around these restrictions so any reconsideration can be properly informed. -### Improved C++ interop and migration +### Improved interop and migration with C++ references We expect this to improve interop and migration by allowing significantly more interface similarity between Carbon and C++. Previously, many things in C++ that @@ -854,6 +886,11 @@ particularly on the `this` parameter that we are going to require migrate to `ref self`. We may have to add back in `addr` to allow a different pointer type for those cases. +> **Future work:** Addressing how we model the various kinds of C++ references +> that Carbon code may need to interact with is +> [something we are actively considering](https://docs.google.com/document/d/1l5TbNuwZEcwm96ejGPLn9GdoQO1fByUW0tFRLU9BqXE/edit?tab=t.0) +> and will be tackled in a future proposal. + ### Part of the expression type system, not object types Much like value/`val` and `var` bindings, `ref` binding and the new return forms From cfe2a65448720f7ec4e90355fd1cde6cd48724ad Mon Sep 17 00:00:00 2001 From: josh11b <15258583+josh11b@users.noreply.github.com> Date: Fri, 1 Aug 2025 10:01:59 -0700 Subject: [PATCH 55/57] Apply suggestions from code review Co-authored-by: Chandler Carruth --- proposals/p5434.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index 7f293586fde8e..e7e68db9394e4 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -266,7 +266,8 @@ fields. var outer_size: i32 = 123; class Invalid { - // ❌ Invalid. + // ❌ Invalid. We don't currently have runtime `let` bindings in classes, + // or `ref` on `var`s, but the intent is to not have `ref` bindings as fields. let ref invalid_ref_field: i32 = outer_size; } @@ -833,6 +834,8 @@ matching `[[clang::lifetimebound]]`, which has the goal of preventing some classes of bugs, not full memory safety. We will reconsider this with the memory safety design. +Clang's `lifetimebound` attribute also only applies to the immediately pointed to objects (by pointers or reference parameters, or pointers or reference subobjects of an aggregate parameter). We suggest a simpler, transitive model here that is more restrictive but should be compatible. That said, pinning down the exact and firm semantics of `bound`, especially in these complex cases, is deferred to the full memory safety design as well. + ### How addresses interact with `ref` The address of a `ref` binding is `noalias` and either `captures(none)` or From 35201809d6c589ed4424a3e7c4ce96d4e29e7c85 Mon Sep 17 00:00:00 2001 From: Josh L Date: Fri, 1 Aug 2025 17:13:59 +0000 Subject: [PATCH 56/57] Add TODO about updating initializing return docs --- docs/design/values.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/design/values.md b/docs/design/values.md index d810d90fb37e2..5704149b4130c 100644 --- a/docs/design/values.md +++ b/docs/design/values.md @@ -555,6 +555,12 @@ functions with a `()` return type for the purpose of expression categories. #### Deferred initialization from values and references +TODO: This section needs to be updated to reflect the addition of `-> val` +returns in [proposal #5434](/proposals/p5434.md). This section could be replaced +by a statement that initializing returns may be replaced by value returns when +that is safe and correct, moving much of this content into a description of how +value returns works. + Carbon also makes the evaluation of function calls and return statements tightly linked in order to enable more efficiency improvements. It allows the actual initialization performed by the `return` statement with its expression to be From 5aa00c73d98d1f9f02f7e5148a04da29e02fe210 Mon Sep 17 00:00:00 2001 From: Josh L Date: Fri, 1 Aug 2025 17:15:43 +0000 Subject: [PATCH 57/57] Fix formatting --- proposals/p5434.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/proposals/p5434.md b/proposals/p5434.md index e7e68db9394e4..d2d7cbec91fef 100644 --- a/proposals/p5434.md +++ b/proposals/p5434.md @@ -834,7 +834,12 @@ matching `[[clang::lifetimebound]]`, which has the goal of preventing some classes of bugs, not full memory safety. We will reconsider this with the memory safety design. -Clang's `lifetimebound` attribute also only applies to the immediately pointed to objects (by pointers or reference parameters, or pointers or reference subobjects of an aggregate parameter). We suggest a simpler, transitive model here that is more restrictive but should be compatible. That said, pinning down the exact and firm semantics of `bound`, especially in these complex cases, is deferred to the full memory safety design as well. +Clang's `lifetimebound` attribute also only applies to the immediately pointed +to objects (by pointers or reference parameters, or pointers or reference +subobjects of an aggregate parameter). We suggest a simpler, transitive model +here that is more restrictive but should be compatible. That said, pinning down +the exact and firm semantics of `bound`, especially in these complex cases, is +deferred to the full memory safety design as well. ### How addresses interact with `ref`