diff --git a/compiler_dualcompiler_test.go b/compiler_dualcompiler_test.go index 4bbd1074f..d18b23b5e 100644 --- a/compiler_dualcompiler_test.go +++ b/compiler_dualcompiler_test.go @@ -31,11 +31,6 @@ import ( func TestDualCompiler_ParseFilesMessageComments(t *testing.T) { t.Parallel() - skip := dualcompiler.SkipConfig{ - SkipNew: true, - SkipReason: "source code info not yet fully implemented in experimental compiler", - } - resolver := protocompile.WithStandardImports(&protocompile.SourceResolver{ ImportPaths: []string{"internal/testdata"}, }) @@ -46,7 +41,7 @@ func TestDualCompiler_ParseFilesMessageComments(t *testing.T) { dualcompiler.RunWithBothCompilersIf( t, - skip, + dualcompiler.SkipConfig{}, opts, func(t *testing.T, compiler dualcompiler.CompilerInterface) { ctx := t.Context() @@ -115,11 +110,6 @@ func TestDualCompiler_ParseFilesWithImportsNoImportPath(t *testing.T) { func TestDualCompiler_ParseCommentsBeforeDot(t *testing.T) { t.Parallel() - skip := dualcompiler.SkipConfig{ - SkipNew: true, - SkipReason: "source code info not yet fully implemented in experimental compiler", - } - accessor := protocompile.SourceAccessorFromMap(map[string]string{ "test.proto": ` syntax = "proto3"; @@ -137,7 +127,7 @@ message Foo { dualcompiler.RunWithBothCompilersIf( t, - skip, + dualcompiler.SkipConfig{}, opts, func(t *testing.T, compiler dualcompiler.CompilerInterface) { ctx := t.Context() diff --git a/experimental/ast/context.go b/experimental/ast/context.go index 297eaf6eb..54dc05137 100644 --- a/experimental/ast/context.go +++ b/experimental/ast/context.go @@ -96,6 +96,18 @@ func (f *File) Imports() iter.Seq[DeclImport] { }) } +// Options returns an iterator over this file's option definitions. +func (f *File) Options() iter.Seq[DefOption] { + return iterx.FilterMap(seq.Values(f.Decls()), func(d DeclAny) (DefOption, bool) { + if def := d.AsDef(); !def.IsZero() { + if def.Classify() == DefKindOption { + return def.AsOption(), true + } + } + return DefOption{}, false + }) +} + // Path returns the semantic import path of this file. func (f *File) Path() string { if f == nil { diff --git a/experimental/ast/decl_body.go b/experimental/ast/decl_body.go index dbaff7c0d..83a61df13 100644 --- a/experimental/ast/decl_body.go +++ b/experimental/ast/decl_body.go @@ -15,10 +15,13 @@ package ast import ( + "iter" + "github.com/bufbuild/protocompile/experimental/id" "github.com/bufbuild/protocompile/experimental/seq" "github.com/bufbuild/protocompile/experimental/source" "github.com/bufbuild/protocompile/experimental/token" + "github.com/bufbuild/protocompile/internal/ext/iterx" ) // DeclBody is the body of a [DeclBody], or the whole contents of a [File]. The @@ -110,3 +113,15 @@ func (d DeclBody) Decls() seq.Inserter[DeclAny] { }, ) } + +// Options returns an iterator over the option definitions in this body. +func (d DeclBody) Options() iter.Seq[DefOption] { + return iterx.FilterMap(seq.Values(d.Decls()), func(d DeclAny) (DefOption, bool) { + if def := d.AsDef(); !def.IsZero() { + if def.Classify() == DefKindOption { + return def.AsOption(), true + } + } + return DefOption{}, false + }) +} diff --git a/experimental/ast/expr.go b/experimental/ast/expr.go index de4614fd2..f1b76be3f 100644 --- a/experimental/ast/expr.go +++ b/experimental/ast/expr.go @@ -74,7 +74,7 @@ func (e ExprAny) AsLiteral() ExprLiteral { } // AsPath converts a ExprAny into a ExprPath, if that is the type -// it contains.q +// it contains. // // Otherwise, returns zero. func (e ExprAny) AsPath() ExprPath { diff --git a/experimental/ast/predeclared/predeclared.go b/experimental/ast/predeclared/predeclared.go index 1fc350e26..63e5cf0ea 100644 --- a/experimental/ast/predeclared/predeclared.go +++ b/experimental/ast/predeclared/predeclared.go @@ -26,7 +26,10 @@ import ( "fmt" "iter" + "google.golang.org/protobuf/types/descriptorpb" + "github.com/bufbuild/protocompile/experimental/token/keyword" + "github.com/bufbuild/protocompile/internal/ext/slicesx" ) // Name is one of the built-in Protobuf names. These represent particular @@ -63,6 +66,29 @@ const ( Float64 = Double ) +// predeclaredToFDPType maps the scalar predeclared [Name]s to their respective +// [descriptorpb.FieldDescriptorProto_Type]. +var predeclaredToFDPType = []descriptorpb.FieldDescriptorProto_Type{ + Int32: descriptorpb.FieldDescriptorProto_TYPE_INT32, + Int64: descriptorpb.FieldDescriptorProto_TYPE_INT64, + UInt32: descriptorpb.FieldDescriptorProto_TYPE_UINT32, + UInt64: descriptorpb.FieldDescriptorProto_TYPE_UINT64, + SInt32: descriptorpb.FieldDescriptorProto_TYPE_SINT32, + SInt64: descriptorpb.FieldDescriptorProto_TYPE_SINT64, + + Fixed32: descriptorpb.FieldDescriptorProto_TYPE_FIXED32, + Fixed64: descriptorpb.FieldDescriptorProto_TYPE_FIXED64, + SFixed32: descriptorpb.FieldDescriptorProto_TYPE_SFIXED32, + SFixed64: descriptorpb.FieldDescriptorProto_TYPE_SFIXED64, + + Float32: descriptorpb.FieldDescriptorProto_TYPE_FLOAT, + Float64: descriptorpb.FieldDescriptorProto_TYPE_DOUBLE, + + Bool: descriptorpb.FieldDescriptorProto_TYPE_BOOL, + String: descriptorpb.FieldDescriptorProto_TYPE_STRING, + Bytes: descriptorpb.FieldDescriptorProto_TYPE_BYTES, +} + // FromKeyword performs a vast from a [keyword.Keyword], but also validates // that it is in-range. If it isn't, returns [Unknown]. func FromKeyword(kw keyword.Keyword) Name { @@ -89,6 +115,13 @@ func (n Name) GoString() string { return keyword.Keyword(n).GoString() } +// FDPType returns the [descriptorpb.FieldDescriptorProto_Type] for the predeclared name, +// if it is a scalar type. Otherwise, it returns 0. +func (n Name) FDPType() descriptorpb.FieldDescriptorProto_Type { + kind, _ := slicesx.Get(predeclaredToFDPType, n) + return kind +} + // InRange returns whether this name value is within the range of declared // values. func (n Name) InRange() bool { diff --git a/experimental/ast/predeclared/predeclared_test.go b/experimental/ast/predeclared/predeclared_test.go index 66a5279b1..364bd6f8e 100644 --- a/experimental/ast/predeclared/predeclared_test.go +++ b/experimental/ast/predeclared/predeclared_test.go @@ -18,6 +18,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "google.golang.org/protobuf/types/descriptorpb" "github.com/bufbuild/protocompile/experimental/ast/predeclared" ) @@ -28,27 +29,28 @@ func TestPredicates(t *testing.T) { tests := []struct { v predeclared.Name scalar, key bool + fdpType descriptorpb.FieldDescriptorProto_Type }{ {v: predeclared.Unknown}, - {v: predeclared.Int32, scalar: true, key: true}, - {v: predeclared.Int64, scalar: true, key: true}, - {v: predeclared.UInt32, scalar: true, key: true}, - {v: predeclared.UInt64, scalar: true, key: true}, - {v: predeclared.SInt32, scalar: true, key: true}, - {v: predeclared.SInt64, scalar: true, key: true}, + {v: predeclared.Int32, scalar: true, key: true, fdpType: descriptorpb.FieldDescriptorProto_TYPE_INT32}, + {v: predeclared.Int64, scalar: true, key: true, fdpType: descriptorpb.FieldDescriptorProto_TYPE_INT64}, + {v: predeclared.UInt32, scalar: true, key: true, fdpType: descriptorpb.FieldDescriptorProto_TYPE_UINT32}, + {v: predeclared.UInt64, scalar: true, key: true, fdpType: descriptorpb.FieldDescriptorProto_TYPE_UINT64}, + {v: predeclared.SInt32, scalar: true, key: true, fdpType: descriptorpb.FieldDescriptorProto_TYPE_SINT32}, + {v: predeclared.SInt64, scalar: true, key: true, fdpType: descriptorpb.FieldDescriptorProto_TYPE_SINT64}, - {v: predeclared.Fixed32, scalar: true, key: true}, - {v: predeclared.Fixed64, scalar: true, key: true}, - {v: predeclared.SFixed32, scalar: true, key: true}, - {v: predeclared.SFixed64, scalar: true, key: true}, + {v: predeclared.Fixed32, scalar: true, key: true, fdpType: descriptorpb.FieldDescriptorProto_TYPE_FIXED32}, + {v: predeclared.Fixed64, scalar: true, key: true, fdpType: descriptorpb.FieldDescriptorProto_TYPE_FIXED64}, + {v: predeclared.SFixed32, scalar: true, key: true, fdpType: descriptorpb.FieldDescriptorProto_TYPE_SFIXED32}, + {v: predeclared.SFixed64, scalar: true, key: true, fdpType: descriptorpb.FieldDescriptorProto_TYPE_SFIXED64}, - {v: predeclared.Float, scalar: true}, - {v: predeclared.Double, scalar: true}, + {v: predeclared.Float, scalar: true, fdpType: descriptorpb.FieldDescriptorProto_TYPE_FLOAT}, + {v: predeclared.Double, scalar: true, fdpType: descriptorpb.FieldDescriptorProto_TYPE_DOUBLE}, - {v: predeclared.String, scalar: true, key: true}, - {v: predeclared.Bytes, scalar: true}, - {v: predeclared.Bool, scalar: true, key: true}, + {v: predeclared.String, scalar: true, key: true, fdpType: descriptorpb.FieldDescriptorProto_TYPE_STRING}, + {v: predeclared.Bytes, scalar: true, fdpType: descriptorpb.FieldDescriptorProto_TYPE_BYTES}, + {v: predeclared.Bool, scalar: true, key: true, fdpType: descriptorpb.FieldDescriptorProto_TYPE_BOOL}, {v: predeclared.Map}, {v: predeclared.Max}, @@ -61,5 +63,6 @@ func TestPredicates(t *testing.T) { for _, test := range tests { assert.Equal(t, test.scalar, test.v.IsScalar()) assert.Equal(t, test.key, test.v.IsMapKey()) + assert.Equal(t, test.fdpType, test.v.FDPType()) } } diff --git a/experimental/fdp/comments.go b/experimental/fdp/comments.go new file mode 100644 index 000000000..e9537ece6 --- /dev/null +++ b/experimental/fdp/comments.go @@ -0,0 +1,332 @@ +// Copyright 2020-2025 Buf Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fdp + +import ( + "fmt" + "slices" + "strings" + "unicode" + + "github.com/bufbuild/protocompile/experimental/id" + "github.com/bufbuild/protocompile/experimental/token" + "github.com/bufbuild/protocompile/experimental/token/keyword" + "github.com/bufbuild/protocompile/internal/ext/slicesx" +) + +// commentTracker is used to track and attribute comments in a token stream. All attributed +// comments are stored in [commentTracker].attributed for easy look-up by [token.ID]. +type commentTracker struct { + cursor *token.Cursor + attributed map[token.ID]*comments // [token.ID] and its attributed comments. + tracked []paragraph + + current []token.Token + prev token.ID // The last non-skippable token. + // The first line of the current comment tokens is on the same line as the last non-skippable token. + firstCommentOnSameLine bool +} + +// A paragraph is a group of comment and whitespace tokens that make up a single paragraph comment. +type paragraph []token.Token + +// stringify returns the paragraph is a single string. It also trims off the leading "//" +// for line comments, and enclosing "/* */" for block comments. +func (p paragraph) stringify() string { + var str strings.Builder + for _, t := range p { + text := t.Text() + if t.Kind() != token.Comment { + fmt.Fprint(&str, text) + continue + } + switch { + case strings.HasPrefix(text, "//"): + // For line comments, the leading "//" needs to be trimmed off. + fmt.Fprint(&str, strings.TrimPrefix(text, "//")) + case strings.HasPrefix(text, "/*"): + // For block comments, we iterate through each line and trim the leading "/*", + // "*", and "*/". + for _, line := range strings.SplitAfter(text, "\n") { + switch { + case strings.HasPrefix(line, "/*"): + fmt.Fprint(&str, strings.TrimPrefix(line, "/*")) + case strings.HasSuffix(line, "*/"): + fmt.Fprint(&str, strings.TrimSuffix(line, "*/")) + case strings.HasPrefix(strings.TrimSpace(line), "*"): + // We check the line with all spaces trimmed because of leading whitespace. + fmt.Fprint(&str, strings.TrimPrefix(strings.TrimLeftFunc(line, unicode.IsSpace), "*")) + } + } + } + } + return str.String() +} + +// Comments are the leading, trailing, and detached comments associated with a token. +type comments struct { + leading paragraph + trailing paragraph + detached []paragraph +} + +// leadingComment returns the leading comment string. +func (c comments) leadingComment() string { + return c.leading.stringify() +} + +// trailingComment returns the trailing comment string. +func (c comments) trailingComment() string { + return c.trailing.stringify() +} + +// detachedComments returns a slice of detached comment strings. +func (c comments) detachedComments() []string { + detached := make([]string, len(c.detached)) + for i, paragraph := range c.detached { + detached[i] = paragraph.stringify() + } + return detached +} + +// attributeComments walks the given token stream and groups comment and space tokens +// into [paragraph]s and attributes them to non-skippable tokens as leading, trailing, and +// detached comments. +func (ct *commentTracker) attributeComments(cursor *token.Cursor) { + ct.cursor = cursor + t := cursor.NextSkippable() + for !t.IsZero() { + switch t.Kind() { + case token.Comment: + ct.handleCommentToken(t) + case token.Space: + ct.handleSpaceToken(t) + default: + ct.handleNonSkippableToken(t) + } + if !t.IsLeaf() { + ct.attributeComments(t.Children()) + _, end := t.StartEnd() + ct.handleNonSkippableToken(end) + ct.cursor = cursor + } + t = cursor.NextSkippable() + } +} + +// handleCommentToken looks at the current comment [token.Token] and determines whether to +// start tracking a new comment paragraph or track it as part of an existing paragraph. +// +// For line comments, if it is on the same line as the previous non-skippable token, it is +// always considered its own paragraph. +// +// A block comment cannot be made into a paragraph with other tokens, so the currently +// tracked paragraph is closed out, and the block comment is also closed out as its own +// paragraph. +// +// The first comment token since the last non-skippable token is always tracked. +func (ct *commentTracker) handleCommentToken(t token.Token) { + prev := id.Wrap(ct.cursor.Context(), ct.prev) + isLineComment := strings.HasPrefix(t.Text(), "//") + + if !isLineComment { + // Block comments are their own paragraph, close the current paragraph and track the + // current block comment as its own paragraph. + ct.closeParagraph() + ct.current = append(ct.current, t) + ct.closeParagraph() + return + } + + ct.current = append(ct.current, t) + // If this is not the first comment in the current paragraph, move on. + if len(ct.current) > 1 { + return + } + + if !prev.IsZero() && ct.cursor.NewLinesBetween(prev, t, 1) == 0 { + // This first comment is always in a paragraph by itself if there are no newlines + // between it and the previous non-skippable token. + ct.closeParagraph() + ct.firstCommentOnSameLine = true + } +} + +// handleSpaceToken looks at the current space [token.Token] and determines whether this +// space token is part of the current comment paragraph or if the current paragraph needs +// to be closed. +// +// If there are no currently tracked paragraphs, then the space token is thrown away, +// paragraphs are not started with space tokens. +// +// If the current space token is a newline, and is preceded by another token that ends with +// a newline, then the current paragraph is closed, and the current newline token is dropped. +// Otherwise, the newline token is attached to the current paragraph. +// +// All other space tokens are thrown away. +func (ct *commentTracker) handleSpaceToken(t token.Token) { + if !strings.HasSuffix(t.Text(), "\n") || len(ct.current) == 0 { + return + } + + if strings.HasSuffix(ct.current[len(ct.current)-1].Text(), "\n") { + ct.closeParagraph() + } else { + ct.current = append(ct.current, t) + } +} + +// handleNonSkippableToken looks at the current non-skippable [token.Token], closes out the +// currently tracked paragraph, and determines attributions for the tracked comment paragraphs. +// +// Comments are either attributed as leading or detached leading comments on the current +// token or as trailing comments on the last seen non-skippable token. +func (ct *commentTracker) handleNonSkippableToken(t token.Token) { + ct.closeParagraph() + prev := id.Wrap(ct.cursor.Context(), ct.prev) + + // Set new non-skippable token + ct.prev = t.ID() + + if len(ct.tracked) == 0 { + return + } + + var donate bool // Donate the first tracked paragraph as a trailing comment to prev + switch { + case prev.IsZero(): + donate = false + case ct.firstCommentOnSameLine: + donate = true + // Check if there are more than 2 newlines between the previous non-skippable token + // and the first line of the first tracked paragraph. + case ct.cursor.NewLinesBetween(prev, ct.tracked[0][0], 2) < 2: + // If yes, check the remaining criteria for donation: + // + // 1. Is there more than one comment? If not, donate. + // 2. Is the current token one of the closers, ), ], or } (but not >). If yes, donate + // the currently tracked paragraphs because a body is closed. + // 3. Is there more than one newline between the current token and the end of the + // first tracked paragraph? If yes, donate. + switch { + case len(ct.tracked) > 1 && ct.tracked[1] != nil: + donate = true + case slicesx.Among( + t.Text(), + keyword.LParen.String(), + keyword.LBracket.String(), + keyword.LBrace.String(), + ): + donate = true + case ct.cursor.NewLinesBetween(ct.tracked[0][len(ct.tracked[0])-1], t, 2) > 1: + donate = true + } + } + + if donate { + ct.setTrailing(ct.tracked[0], prev) + ct.tracked = ct.tracked[1:] + } + + if len(ct.tracked) > 0 { + // The leading comment must have precisely one new line between it and the current token. + if last := ct.tracked[len(ct.tracked)-1]; ct.cursor.NewLinesBetween(last[len(last)-1], t, 2) == 1 { + ct.setLeading(last, t) + ct.tracked = ct.tracked[:len(ct.tracked)-1] + } + } + + // Check the remaining tracked comments to see if they are detached comments. + // Detached comments must be separated from other non-space tokens by at least 2 + // newlines (unless they are at the top of the file), e.g. a file with contents: + // + // // This is a detached comment at the top of the file. + // + // edition = "2023"; + // + // message Foo {} + // // This is neither a detached nor trailing comment, since it is not separated from + // // the closing brace above by an empty line. + // + // // This IS a detached comment for Bar. + // + // // A leading comment for Bar. + // message Bar {} + // + for i, remaining := range ct.tracked { + prev := remaining[0].Prev() + for prev.Kind() == token.Space { + prev = prev.Prev() + } + next := remaining[len(remaining)-1].Next() + for next.Kind() == token.Space { + next = next.Next() + } + if !prev.IsZero() && ct.cursor.NewLinesBetween(prev, remaining[0], 2) < 2 { + continue + } + if !next.IsZero() && ct.cursor.NewLinesBetween(remaining[len(remaining)-1], next, 2) == 2 { + ct.setDetached(ct.tracked[i:], t) + break + } + } + // Reset tracked comment information + ct.firstCommentOnSameLine = false + ct.tracked = nil +} + +// closeParagraph takes the currently tracked paragraph, closes it, and tracks it. +func (ct *commentTracker) closeParagraph() { + // If the current paragraph only contains whitespace tokens, then throw it away. + if slices.ContainsFunc(ct.current, func(t token.Token) bool { + return t.Kind() == token.Comment + }) { + ct.tracked = append(ct.tracked, ct.current) + } + ct.current = nil +} + +// setLeading sets the given paragraph as the leading comment on the given token. +func (ct *commentTracker) setLeading(leading paragraph, t token.Token) { + ct.mutateComment(t, func(c *comments) { + c.leading = leading + }) +} + +// setTrailing sets the given paragraph as the trailing comment on the given token. +func (ct *commentTracker) setTrailing(trailing paragraph, t token.Token) { + ct.mutateComment(t, func(c *comments) { + c.trailing = trailing + }) +} + +// setDetached sets the given slice of paragraphs as the detached comments on the given token. +func (ct *commentTracker) setDetached(detached []paragraph, t token.Token) { + ct.mutateComment(t, func(c *comments) { + c.detached = detached + }) +} + +// mutateComment mutates the attributed comments on the given token. +func (ct *commentTracker) mutateComment(t token.Token, mutate func(*comments)) { + if ct.attributed == nil { + ct.attributed = make(map[token.ID]*comments) + } + + if ct.attributed[t.ID()] == nil { + ct.attributed[t.ID()] = &comments{} + } + mutate(ct.attributed[t.ID()]) +} diff --git a/experimental/fdp/comments_test.go b/experimental/fdp/comments_test.go new file mode 100644 index 000000000..47f556bbd --- /dev/null +++ b/experimental/fdp/comments_test.go @@ -0,0 +1,56 @@ +// Copyright 2020-2025 Buf Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fdp + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/bufbuild/protocompile/experimental/source" + "github.com/bufbuild/protocompile/experimental/token" +) + +func TestLineComments(t *testing.T) { + t.Parallel() + + s := &token.Stream{ + File: source.NewFile("test", + `// Line 1 +// Line 2`), + } + line1 := s.Push(9, token.Comment) + newline := s.Push(1, token.Space) + line2 := s.Push(9, token.Comment) + + tokens := paragraph([]token.Token{line1, newline, line2}) + assert.Equal(t, " Line 1\n Line 2", tokens.stringify()) +} + +func TestBlockComments(t *testing.T) { + t.Parallel() + + s := &token.Stream{ + File: source.NewFile("test", + `/* +* Line 1 +* Line 2 +*/`), + } + block := s.Push(21, token.Comment) + + tokens := paragraph([]token.Token{block}) + assert.Equal(t, "\n Line 1\n Line 2\n", tokens.stringify()) +} diff --git a/experimental/fdp/descriptor.go b/experimental/fdp/descriptor.go new file mode 100644 index 000000000..838eecf4b --- /dev/null +++ b/experimental/fdp/descriptor.go @@ -0,0 +1,76 @@ +// Copyright 2020-2025 Buf Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package fdp provides functionality for lowering the IR to a FileDescriptorSet. +package fdp + +import ( + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/descriptorpb" + + "github.com/bufbuild/protocompile/experimental/ir" +) + +// DescriptorSetBytes generates a FileDescriptorSet for the given files, and returns the +// result as an encoded byte slice. +// +// The resulting FileDescriptorSet is always fully linked: it contains all dependencies except +// the WKTs, and all names are fully-qualified. +func DescriptorSetBytes(files []*ir.File, options ...DescriptorOption) ([]byte, error) { + var g generator + for _, opt := range options { + if opt != nil { + opt(&g) + } + } + + fds := new(descriptorpb.FileDescriptorSet) + g.files(files, fds) + return proto.Marshal(fds) +} + +// DescriptorProtoBytes generates a single FileDescriptorProto for file, and returns the +// result as an encoded byte slice. +// +// The resulting FileDescriptorProto is fully linked: all names are fully-qualified. +func DescriptorProtoBytes(file *ir.File, options ...DescriptorOption) ([]byte, error) { + var g generator + for _, opt := range options { + if opt != nil { + opt(&g) + } + } + + fdp := new(descriptorpb.FileDescriptorProto) + g.file(file, fdp) + return proto.Marshal(fdp) +} + +// DescriptorOption is an option to pass to [DescriptorSetBytes] or [DescriptorProtoBytes]. +type DescriptorOption func(*generator) + +// IncludeDebugInfo sets whether or not to include google.protobuf.SourceCodeInfo in +// the output. +func IncludeSourceCodeInfo(flag bool) DescriptorOption { + return func(g *generator) { + g.includeDebugInfo = flag + } +} + +// ExcludeFiles excludes the given files from the output of [DescriptorSetBytes]. +func ExcludeFiles(exclude func(*ir.File) bool) DescriptorOption { + return func(g *generator) { + g.exclude = exclude + } +} diff --git a/experimental/fdp/generator.go b/experimental/fdp/generator.go new file mode 100644 index 000000000..69ea1ea8f --- /dev/null +++ b/experimental/fdp/generator.go @@ -0,0 +1,930 @@ +// Copyright 2020-2025 Buf Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fdp + +import ( + "math" + "slices" + "strconv" + + descriptorv1 "buf.build/gen/go/bufbuild/protodescriptor/protocolbuffers/go/buf/descriptor/v1" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/descriptorpb" + + "github.com/bufbuild/protocompile/experimental/ast" + "github.com/bufbuild/protocompile/experimental/ast/syntax" + "github.com/bufbuild/protocompile/experimental/ir" + "github.com/bufbuild/protocompile/experimental/ir/presence" + "github.com/bufbuild/protocompile/experimental/seq" + "github.com/bufbuild/protocompile/experimental/source" + "github.com/bufbuild/protocompile/experimental/token" + "github.com/bufbuild/protocompile/experimental/token/keyword" + "github.com/bufbuild/protocompile/internal" + "github.com/bufbuild/protocompile/internal/ext/cmpx" + "github.com/bufbuild/protocompile/internal/ext/iterx" +) + +type generator struct { + currentFile *ir.File + includeDebugInfo bool + exclude func(*ir.File) bool + + path *path + sourceCodeInfo *descriptorpb.SourceCodeInfo + sourceCodeInfoExtn *descriptorv1.SourceCodeInfoExtension + + commentTracker *commentTracker +} + +func (g *generator) files(files []*ir.File, fds *descriptorpb.FileDescriptorSet) { + // Build up all of the imported files. We can't just pull out the transitive + // imports for each file because we want the result to be sorted + // topologically. + for file := range ir.TopoSort(files) { + if g.exclude != nil && g.exclude(file) { + continue + } + + fdp := new(descriptorpb.FileDescriptorProto) + fds.File = append(fds.File, fdp) + + g.file(file, fdp) + } +} + +func (g *generator) file(file *ir.File, fdp *descriptorpb.FileDescriptorProto) { + g.currentFile = file + fdp.Name = addr(file.Path()) + g.path = new(path) + + if g.includeDebugInfo { + g.sourceCodeInfo = new(descriptorpb.SourceCodeInfo) + fdp.SourceCodeInfo = g.sourceCodeInfo + + ct := new(commentTracker) + g.commentTracker = ct + ct.attributeComments(g.currentFile.AST().Stream().Cursor()) + + g.sourceCodeInfoExtn = new(descriptorv1.SourceCodeInfoExtension) + proto.SetExtension(g.sourceCodeInfo, descriptorv1.E_BufSourceCodeInfoExtension, g.sourceCodeInfoExtn) + } + + fdp.Package = addr(string(file.Package())) + g.addSourceLocationWithSourcePathElements( + file.AST().Package().Span(), + []int32{internal.FilePackageTag}, + file.AST().Package().KeywordToken().ID(), + file.AST().Package().Semicolon().ID(), + ) + + if file.Syntax().IsEdition() { + fdp.Syntax = addr("editions") + fdp.Edition = descriptorpb.Edition(file.Syntax()).Enum() + } else { + fdp.Syntax = addr(file.Syntax().String()) + } + g.addSourceLocationWithSourcePathElements( + file.AST().Syntax().Span(), + // According to descriptor.proto and protoc behavior, the path is always set to [12] + // for both syntax and editions. + []int32{internal.FileSyntaxTag}, + file.AST().Syntax().KeywordToken().ID(), + file.AST().Syntax().Semicolon().ID(), + ) + + if g.sourceCodeInfoExtn != nil { + g.sourceCodeInfoExtn.IsSyntaxUnspecified = file.AST().Syntax().IsZero() + } + + // Canonicalize import order so that it does not change whenever we refactor + // internal structures. + imports := seq.ToSlice(file.Imports()) + slices.SortFunc(imports, cmpx.Key(func(imp ir.Import) int { + return imp.Decl.KeywordToken().Span().Start + })) + + var publicDepIndex, weakDepIndex, optionDepIndex int32 + for i, imp := range imports { + if !imp.Option { + fdp.Dependency = append(fdp.Dependency, imp.Path()) + g.addSourceLocationWithSourcePathElements( + imp.Decl.Span(), + []int32{internal.FileDependencyTag, int32(i)}, + imp.Decl.KeywordToken().ID(), + imp.Decl.Semicolon().ID(), + ) + if imp.Public { + fdp.PublicDependency = append(fdp.PublicDependency, int32(i)) + _, public := iterx.Find(seq.Values(imp.Decl.ModifierTokens()), func(t token.Token) bool { + return t.Keyword() == keyword.Public + }) + g.addSourceLocationWithSourcePathElements( + public.Span(), + []int32{internal.FilePublicDependencyTag, publicDepIndex}, + ) + publicDepIndex++ + } + if imp.Weak { + fdp.WeakDependency = append(fdp.WeakDependency, int32(i)) + _, weak := iterx.Find(seq.Values(imp.Decl.ModifierTokens()), func(t token.Token) bool { + return t.Keyword() == keyword.Weak + }) + g.addSourceLocationWithSourcePathElements( + weak.Span(), + []int32{internal.FileWeakDependencyTag, weakDepIndex}, + ) + } + } else if imp.Option { + fdp.OptionDependency = append(fdp.OptionDependency, imp.Path()) + g.addSourceLocationWithSourcePathElements( + imp.Decl.Span(), + []int32{internal.FileOptionDependencyTag, optionDepIndex}, + imp.Decl.KeywordToken().ID(), + imp.Decl.Semicolon().ID(), + ) + } + + if g.sourceCodeInfoExtn != nil && !imp.Used { + g.sourceCodeInfoExtn.UnusedDependency = append(g.sourceCodeInfoExtn.UnusedDependency, int32(i)) + } + } + + var msgIndex, enumIndex int32 + for ty := range seq.Values(file.Types()) { + if ty.IsEnum() { + edp := new(descriptorpb.EnumDescriptorProto) + fdp.EnumType = append(fdp.EnumType, edp) + g.enum(ty, edp, internal.FileEnumsTag, enumIndex) + enumIndex++ + continue + } + + mdp := new(descriptorpb.DescriptorProto) + fdp.MessageType = append(fdp.MessageType, mdp) + g.message(ty, mdp, internal.FileMessagesTag, msgIndex) + msgIndex++ + } + + for i, service := range seq.All(file.Services()) { + sdp := new(descriptorpb.ServiceDescriptorProto) + fdp.Service = append(fdp.Service, sdp) + g.service(service, sdp, internal.FileServicesTag, int32(i)) + } + + var extnIndex int32 + for extend := range seq.Values(file.Extends()) { + g.addSourceLocationWithSourcePathElements( + extend.AST().Span(), + []int32{internal.FileExtensionsTag}, + extend.AST().KeywordToken().ID(), + extend.AST().Body().Braces().ID(), + ) + + for extn := range seq.Values(extend.Extensions()) { + fd := new(descriptorpb.FieldDescriptorProto) + fdp.Extension = append(fdp.Extension, fd) + g.field(extn, fd, internal.FileExtensionsTag, extnIndex) + extnIndex++ + } + } + + if options := file.Options(); !iterx.Empty(options.Fields()) { + for option := range file.AST().Options() { + g.addSourceLocationWithSourcePathElements(option.Span(), []int32{internal.FileOptionsTag}) + } + + fdp.Options = new(descriptorpb.FileOptions) + g.options(options, fdp.Options, internal.FileOptionsTag) + } + + if g.sourceCodeInfoExtn != nil && iterx.Empty2(g.sourceCodeInfoExtn.ProtoReflect().Range) { + proto.ClearExtension(g.sourceCodeInfo, descriptorv1.E_BufSourceCodeInfoExtension) + } + + if g.sourceCodeInfo != nil { + slices.SortFunc(g.sourceCodeInfo.Location, func(a, b *descriptorpb.SourceCodeInfo_Location) int { + return slices.Compare(a.Span, b.Span) + }) + g.sourceCodeInfo.Location = append( + []*descriptorpb.SourceCodeInfo_Location{{Span: locationSpan(file.AST().Span())}}, + g.sourceCodeInfo.Location..., + ) + } +} + +func (g *generator) message(ty ir.Type, mdp *descriptorpb.DescriptorProto, sourcePath ...int32) { + reset := g.path.with(sourcePath...) + defer reset() + + messageAST := ty.AST().AsMessage() + g.addSourceLocation(messageAST.Span(), messageAST.Keyword.ID(), messageAST.Body.Braces().ID()) + + mdp.Name = addr(ty.Name()) + g.addSourceLocationWithSourcePathElements(messageAST.Name.Span(), []int32{internal.MessageNameTag}) + + for i, field := range seq.All(ty.Members()) { + fd := new(descriptorpb.FieldDescriptorProto) + mdp.Field = append(mdp.Field, fd) + g.field(field, fd, internal.MessageFieldsTag, int32(i)) + } + + var extnIndex int32 + for extend := range seq.Values(ty.Extends()) { + g.addSourceLocationWithSourcePathElements( + extend.AST().Span(), + []int32{internal.MessageExtensionsTag}, + extend.AST().KeywordToken().ID(), + extend.AST().Body().Braces().ID(), + ) + + for extn := range seq.Values(extend.Extensions()) { + fd := new(descriptorpb.FieldDescriptorProto) + mdp.Extension = append(mdp.Extension, fd) + g.field(extn, fd, internal.MessageExtensionsTag, extnIndex) + extnIndex++ + } + } + + var enumIndex, nestedMsgIndex int32 + for ty := range seq.Values(ty.Nested()) { + if ty.IsEnum() { + edp := new(descriptorpb.EnumDescriptorProto) + mdp.EnumType = append(mdp.EnumType, edp) + g.enum(ty, edp, internal.MessageEnumsTag, enumIndex) + enumIndex++ + continue + } + + nested := new(descriptorpb.DescriptorProto) + mdp.NestedType = append(mdp.NestedType, nested) + g.message(ty, nested, internal.MessageNestedMessagesTag, nestedMsgIndex) + nestedMsgIndex++ + } + + for i, extensions := range seq.All(ty.ExtensionRanges()) { + er := new(descriptorpb.DescriptorProto_ExtensionRange) + mdp.ExtensionRange = append(mdp.ExtensionRange, er) + + start, end := extensions.Range() + er.Start = addr(start) + er.End = addr(end + 1) // Exclusive. + + g.addSourceLocationWithSourcePathElements( + extensions.DeclAST().Span(), + []int32{internal.MessageExtensionRangesTag}, + extensions.DeclAST().KeywordToken().ID(), + extensions.DeclAST().Semicolon().ID(), + ) + + g.rangeSourceCodeInfo( + extensions.AST(), + internal.MessageExtensionRangesTag, + internal.ExtensionRangeStartTag, + internal.ExtensionRangeEndTag, + int32(i), + ) + + if options := extensions.Options(); !iterx.Empty(options.Fields()) { + g.addSourceLocationWithSourcePathElements( + extensions.DeclAST().Options().Span(), + []int32{internal.ExtensionRangeOptionsTag}, + ) + + er.Options = new(descriptorpb.ExtensionRangeOptions) + g.options(options, er.Options, internal.ExtensionRangeOptionsTag) + } + } + + var topLevelSourceLocation bool + for i, reserved := range seq.All(ty.ReservedRanges()) { + if !topLevelSourceLocation { + g.addSourceLocationWithSourcePathElements( + reserved.DeclAST().Span(), + []int32{internal.MessageReservedRangesTag}, + reserved.DeclAST().KeywordToken().ID(), + reserved.DeclAST().Semicolon().ID(), + ) + topLevelSourceLocation = true + } + + rr := new(descriptorpb.DescriptorProto_ReservedRange) + mdp.ReservedRange = append(mdp.ReservedRange, rr) + + start, end := reserved.Range() + rr.Start = addr(start) + rr.End = addr(end + 1) // Exclusive. + + g.rangeSourceCodeInfo( + reserved.AST(), + internal.MessageReservedRangesTag, + internal.ReservedRangeStartTag, + internal.ReservedRangeEndTag, + int32(i), + ) + } + + topLevelSourceLocation = false + for i, name := range seq.All(ty.ReservedNames()) { + if !topLevelSourceLocation { + g.addSourceLocationWithSourcePathElements( + name.DeclAST().Span(), + []int32{internal.MessageReservedNamesTag}, + name.DeclAST().KeywordToken().ID(), + name.DeclAST().Semicolon().ID(), + ) + topLevelSourceLocation = true + } + + mdp.ReservedName = append(mdp.ReservedName, name.Name()) + g.addSourceLocationWithSourcePathElements( + name.AST().Span(), + []int32{internal.MessageReservedNamesTag, int32(i)}, + ) + } + + for i, oneof := range seq.All(ty.Oneofs()) { + odp := new(descriptorpb.OneofDescriptorProto) + mdp.OneofDecl = append(mdp.OneofDecl, odp) + g.oneof(oneof, odp, internal.MessageOneofsTag, int32(i)) + } + + if g.currentFile.Syntax() == syntax.Proto3 { + // Only now that we have added all of the normal oneofs do we add the + // synthetic oneofs. + for i, field := range seq.All(ty.Members()) { + if field.SyntheticOneofName() == "" { + continue + } + + fdp := mdp.Field[i] + fdp.Proto3Optional = addr(true) + fdp.OneofIndex = addr(int32(len(mdp.OneofDecl))) + mdp.OneofDecl = append(mdp.OneofDecl, &descriptorpb.OneofDescriptorProto{ + Name: addr(field.SyntheticOneofName()), + }) + } + } + + if options := ty.Options(); !iterx.Empty(options.Fields()) { + for option := range messageAST.Body.Options() { + g.addSourceLocationWithSourcePathElements(option.Span(), []int32{internal.MessageOptionsTag}) + } + + mdp.Options = new(descriptorpb.MessageOptions) + g.options(options, mdp.Options, internal.MessageOptionsTag) + } + + switch exported, explicit := ty.IsExported(); { + case !explicit: + break + case exported: + mdp.Visibility = descriptorpb.SymbolVisibility_VISIBILITY_EXPORT.Enum() + case !exported: + mdp.Visibility = + descriptorpb.SymbolVisibility_VISIBILITY_LOCAL.Enum() + } +} + +func (g *generator) field(f ir.Member, fdp *descriptorpb.FieldDescriptorProto, sourcePath ...int32) { + reset := g.path.with(sourcePath...) + defer reset() + + fieldAST := f.AST().AsField() + g.addSourceLocation(fieldAST.Span(), token.ID(fieldAST.Type.ID()), fieldAST.Semicolon.ID()) + + fdp.Name = addr(f.Name()) + g.addSourceLocationWithSourcePathElements(fieldAST.Name.Span(), []int32{internal.FieldNameTag}) + + fdp.Number = addr(f.Number()) + g.addSourceLocationWithSourcePathElements(fieldAST.Tag.Span(), []int32{internal.FieldNumberTag}) + + switch f.Presence() { + case presence.Explicit, presence.Implicit, presence.Shared: + fdp.Label = descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum() + case presence.Repeated: + fdp.Label = descriptorpb.FieldDescriptorProto_LABEL_REPEATED.Enum() + case presence.Required: + fdp.Label = descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum() + } + + // Note: for specifically protobuf fields, we expect a single prefix. The protocompile + // AST allows for arbitrary nesting of prefixes, so the API returns an iterator, but + // [descriptorpb.FieldDescriptorProto] expects a single label. + for prefix := range fieldAST.Type.Prefixes() { + g.addSourceLocationWithSourcePathElements( + prefix.PrefixToken().Span(), + []int32{internal.FieldLabelTag}, + ) + } + + fieldTypeSourcePathElement := internal.FieldTypeNameTag + if ty := f.Element(); !ty.IsZero() { + if kind := ty.Predeclared().FDPType(); kind != 0 { + fdp.Type = kind.Enum() + fieldTypeSourcePathElement = internal.FieldTypeTag + } else { + fdp.TypeName = addr(string(ty.FullName().ToAbsolute())) + switch { + case ty.IsEnum(): + fdp.Type = descriptorpb.FieldDescriptorProto_TYPE_ENUM.Enum() + case f.IsGroup(): + fdp.Type = descriptorpb.FieldDescriptorProto_TYPE_GROUP.Enum() + default: + fdp.Type = descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum() + } + } + } + g.addSourceLocationWithSourcePathElements( + fieldAST.Type.RemovePrefixes().Span(), + []int32{int32(fieldTypeSourcePathElement)}, + ) + + if f.IsExtension() && f.Container().FullName() != "" { + fdp.Extendee = addr(string(f.Container().FullName().ToAbsolute())) + g.addSourceLocationWithSourcePathElements( + f.Extend().AST().Name().Span(), + []int32{internal.FieldExtendeeTag}, + ) + } + + if oneof := f.Oneof(); !oneof.IsZero() { + fdp.OneofIndex = addr(int32(oneof.Index())) + } + + if options := f.Options(); !iterx.Empty(options.Fields()) { + g.addSourceLocationWithSourcePathElements( + fieldAST.Options.Span(), + []int32{internal.FieldOptionsTag}, + ) + + fdp.Options = new(descriptorpb.FieldOptions) + g.options(options, fdp.Options, internal.FieldOptionsTag) + } + + fdp.JsonName = addr(f.JSONName()) + + d := f.PseudoOptions().Default + if !d.IsZero() { + if v, ok := d.AsBool(); ok { + fdp.DefaultValue = addr(strconv.FormatBool(v)) + } else if v, ok := d.AsInt(); ok { + fdp.DefaultValue = addr(strconv.FormatInt(v, 10)) + } else if v, ok := d.AsUInt(); ok { + fdp.DefaultValue = addr(strconv.FormatUint(v, 10)) + } else if v, ok := d.AsFloat(); ok { + switch { + case math.IsInf(v, 1): + fdp.DefaultValue = addr("inf") + case math.IsInf(v, -1): + fdp.DefaultValue = addr("-inf") + case math.IsNaN(v): + fdp.DefaultValue = addr("nan") // Goodbye NaN payload. :( + default: + fdp.DefaultValue = addr(strconv.FormatFloat(v, 'g', -1, 64)) + } + } else if v, ok := d.AsString(); ok { + fdp.DefaultValue = addr(v) + } + } +} + +func (g *generator) oneof(o ir.Oneof, odp *descriptorpb.OneofDescriptorProto, sourcePath ...int32) { + topLevelReset := g.path.with(sourcePath...) + defer topLevelReset() + + oneofAST := o.AST().AsOneof() + g.addSourceLocation(oneofAST.Span(), oneofAST.Keyword.ID(), oneofAST.Body.Braces().ID()) + + odp.Name = addr(o.Name()) + reset := g.path.with(internal.OneofNameTag) + g.addSourceLocation(oneofAST.Name.Span()) + reset() + + if options := o.Options(); !iterx.Empty(options.Fields()) { + for option := range oneofAST.Body.Options() { + reset := g.path.with(internal.OneofOptionsTag) + g.addSourceLocation(option.Span()) + reset() + } + + odp.Options = new(descriptorpb.OneofOptions) + g.options(options, odp.Options, internal.OneofOptionsTag) + } +} + +func (g *generator) enum(ty ir.Type, edp *descriptorpb.EnumDescriptorProto, sourcePath ...int32) { + topLevelReset := g.path.with(sourcePath...) + defer topLevelReset() + + enumAST := ty.AST().AsEnum() + g.addSourceLocation(enumAST.Span(), enumAST.Keyword.ID(), enumAST.Body.Braces().ID()) + + edp.Name = addr(ty.Name()) + reset := g.path.with(internal.EnumNameTag) + g.addSourceLocation(enumAST.Name.Span()) + reset() + + for i, enumValue := range seq.All(ty.Members()) { + evd := new(descriptorpb.EnumValueDescriptorProto) + edp.Value = append(edp.Value, evd) + g.enumValue(enumValue, evd, internal.EnumValuesTag, int32(i)) + } + + var topLevelSourceLocation bool + for i, reserved := range seq.All(ty.ReservedRanges()) { + if !topLevelSourceLocation { + reset := g.path.with(internal.EnumReservedRangesTag) + g.addSourceLocation( + reserved.DeclAST().Span(), + reserved.DeclAST().KeywordToken().ID(), + reserved.DeclAST().Semicolon().ID(), + ) + reset() + topLevelSourceLocation = true + } + + rr := new(descriptorpb.EnumDescriptorProto_EnumReservedRange) + edp.ReservedRange = append(edp.ReservedRange, rr) + + start, end := reserved.Range() + rr.Start = addr(start) + rr.End = addr(end) // Inclusive, not exclusive like the one for messages! + + g.rangeSourceCodeInfo( + reserved.AST(), + internal.EnumReservedRangesTag, + internal.ReservedRangeStartTag, + internal.ReservedRangeEndTag, + int32(i), + ) + } + + topLevelSourceLocation = false + for i, name := range seq.All(ty.ReservedNames()) { + if !topLevelSourceLocation { + reset := g.path.with(internal.EnumReservedNamesTag) + g.addSourceLocation( + name.DeclAST().Span(), + name.DeclAST().KeywordToken().ID(), + name.DeclAST().Semicolon().ID(), + ) + reset() + topLevelSourceLocation = true + } + + edp.ReservedName = append(edp.ReservedName, name.Name()) + reset := g.path.with(internal.EnumReservedNamesTag, int32(i)) + g.addSourceLocation(name.AST().Span()) + reset() + } + + if options := ty.Options(); !iterx.Empty(options.Fields()) { + for option := range enumAST.Body.Options() { + reset := g.path.with(internal.EnumOptionsTag) + g.addSourceLocation(option.Span()) + reset() + } + + edp.Options = new(descriptorpb.EnumOptions) + g.options(options, edp.Options, internal.EnumOptionsTag) + } + + switch exported, explicit := ty.IsExported(); { + case !explicit: + break + case exported: + edp.Visibility = descriptorpb.SymbolVisibility_VISIBILITY_EXPORT.Enum() + case !exported: + edp.Visibility = + descriptorpb.SymbolVisibility_VISIBILITY_LOCAL.Enum() + } +} + +func (g *generator) enumValue(f ir.Member, evdp *descriptorpb.EnumValueDescriptorProto, sourcePath ...int32) { + topLevelReset := g.path.with(sourcePath...) + defer topLevelReset() + + enumValueAST := f.AST().AsEnumValue() + g.addSourceLocation(enumValueAST.Span(), enumValueAST.Name.ID(), enumValueAST.Semicolon.ID()) + + evdp.Name = addr(f.Name()) + reset := g.path.with(internal.EnumValNameTag) + g.addSourceLocation(enumValueAST.Name.Span()) + reset() + + evdp.Number = addr(f.Number()) + reset = g.path.with(internal.EnumValNumberTag) + g.addSourceLocation(enumValueAST.Tag.Span()) + reset() + + if options := f.Options(); !iterx.Empty(options.Fields()) { + reset := g.path.with(internal.EnumValOptionsTag) + g.addSourceLocation(enumValueAST.Options.Span()) + reset() + + evdp.Options = new(descriptorpb.EnumValueOptions) + g.options(options, evdp.Options, internal.EnumValOptionsTag) + } +} + +func (g *generator) service(s ir.Service, sdp *descriptorpb.ServiceDescriptorProto, sourcePath ...int32) { + topLevelReset := g.path.with(sourcePath...) + defer topLevelReset() + + serviceAST := s.AST().AsService() + g.addSourceLocation(serviceAST.Span(), serviceAST.Keyword.ID(), serviceAST.Body.Braces().ID()) + + sdp.Name = addr(s.Name()) + reset := g.path.with(internal.ServiceNameTag) + g.addSourceLocation(serviceAST.Name.Span()) + reset() + + for i, method := range seq.All(s.Methods()) { + mdp := new(descriptorpb.MethodDescriptorProto) + sdp.Method = append(sdp.Method, mdp) + g.method(method, mdp, internal.ServiceMethodsTag, int32(i)) + } + + if options := s.Options(); !iterx.Empty(options.Fields()) { + sdp.Options = new(descriptorpb.ServiceOptions) + for option := range serviceAST.Body.Options() { + reset := g.path.with(internal.ServiceOptionsTag) + g.addSourceLocation(option.Span()) + reset() + } + g.options(options, sdp.Options, internal.ServiceOptionsTag) + } +} + +func (g *generator) method(m ir.Method, mdp *descriptorpb.MethodDescriptorProto, sourcePath ...int32) { + topLevelReset := g.path.with(sourcePath...) + defer topLevelReset() + + methodAST := m.AST().AsMethod() + + // Comment attribution for tokens is unique. The behavior in protoc for method leading + // comments is as follows for methods without a body: + // + // service FooService { + // // I'm the leading comment for GetFoo + // rpc GetFoo (GetFooRequest) returns (GetFooResponse); // I'm the trailing comment for GetFoo + // } + // + // And for methods with a body: + // + // service FooService { + // // I'm still the leading comment for GetFoo + // rpc GetFoo (GetFooRequest) returns (GetFooResponse) { // I'm the trailing comment for GetFoo + // }; // I am NOT the trailing comment for GetFoo, and am instead dropped. + // } + // + closingToken := m.AST().Semicolon().ID() + if !methodAST.Body.Braces().IsZero() { + closingToken = methodAST.Body.Braces().ID() + } + + g.addSourceLocation(methodAST.Span(), methodAST.Keyword.ID(), closingToken) + + mdp.Name = addr(m.Name()) + reset := g.path.with(internal.MethodNameTag) + g.addSourceLocation(methodAST.Name.Span()) + reset() + + in, inStream := m.Input() + mdp.InputType = addr(string(in.FullName())) + mdp.ClientStreaming = addr(inStream) + + // Methods only have a single input, see [descriptorpb.MethodDescriptorProto]. + inputAST := methodAST.Signature.Inputs().At(0) + if prefixed := inputAST.AsPrefixed(); !prefixed.IsZero() { + reset := g.path.with(internal.MethodInputStreamTag) + g.addSourceLocation(prefixed.PrefixToken().Span()) + reset() + } + reset = g.path.with(internal.MethodInputTag) + g.addSourceLocation(inputAST.RemovePrefixes().Span()) + reset() + + out, outStream := m.Output() + mdp.OutputType = addr(string(out.FullName())) + mdp.ServerStreaming = addr(outStream) + + // Methods only have a single output, see [descriptorpb.MethodDescriptorProto]. + outputAST := methodAST.Signature.Outputs().At(0) + if prefixed := outputAST.AsPrefixed(); !prefixed.IsZero() { + reset := g.path.with(internal.MethodOutputStreamTag) + g.addSourceLocation(prefixed.PrefixToken().Span()) + reset() + } + reset = g.path.with(internal.MethodOutputTag) + g.addSourceLocation(outputAST.RemovePrefixes().Span()) + reset() + + if options := m.Options(); !iterx.Empty(options.Fields()) { + mdp.Options = new(descriptorpb.MethodOptions) + for option := range methodAST.Body.Options() { + reset := g.path.with(internal.MethodOptionsTag) + g.addSourceLocation(option.Span()) + reset() + } + g.options(options, mdp.Options, internal.MethodOptionsTag) + } +} + +func (g *generator) options(v ir.MessageValue, target proto.Message, sourcePathElement int32) { + target.ProtoReflect().SetUnknown(v.Marshal(nil, nil)) + g.messageValueSourceCodeInfo(v, sourcePathElement) +} + +func (g *generator) messageValueSourceCodeInfo(v ir.MessageValue, sourcePath ...int32) { + for field := range v.Fields() { + var optionSpanIndex int32 + for optionSpan := range seq.Values(field.OptionSpans()) { + if optionSpan == nil { + continue + } + + if messageField := field.AsMessage(); !messageField.IsZero() { + g.messageValueSourceCodeInfo(messageField, append(sourcePath, field.Field().Number())...) + continue + } + + span := optionSpan.Span() + // For declarations with bodies, e.g. messages, enums, services, methods, files, + // leading and trailing comments are attributed on the option declarations based on + // the option keyword and semicolon, respectively, e.g. + // + // message Foo { + // // Leading comment for the following option declaration, (a) = 10. + // option (a) = 10; + // option (b) = 20; // Trailing comment for the option declaration (b) = 20. + // } + // + // However, the optionSpan in the IR does not capture the keyword and semicolon + // tokens. In addition to the comments, the span including the option keyword and + // semicolon is needed for the source location. + // + // So this hack checks the non-skippable token directly before and after the + // optionSpan for the option keyword and semicolon tokens respectively. + // + // For declarations with compact options, e.g. fields, enum values, there are no + // comments attributed to the option spans, e.g. + // + // message Foo { + // string name = 1 [ + // // This is dropped. + // (c) = 15, // This is also dropped. + // ] + // } + // + var checkCommentTokens []token.ID + keyword, semicolon := g.optionKeywordAndSemicolon(span) + if !keyword.IsZero() && !semicolon.IsZero() { + checkCommentTokens = []token.ID{keyword.ID(), semicolon.ID()} + span = source.Between(keyword.Span(), semicolon.Span()) + } + + if field.Field().IsRepeated() { + reset := g.path.with(append(sourcePath, field.Field().Number(), optionSpanIndex)...) + g.addSourceLocation(span, checkCommentTokens...) + reset() + optionSpanIndex++ + } else { + reset := g.path.with(append(sourcePath, field.Field().Number())...) + g.addSourceLocation(span, checkCommentTokens...) + reset() + } + } + } +} + +// optionKeywordAndSemicolon is a helper function that checks the non-skippable tokens +// before and after the given span. If the non-skippable token before is the option keyword +// and the non-skippable token after is the semicolon, then both are returned. +func (g *generator) optionKeywordAndSemicolon(optionSpan source.Span) (token.Token, token.Token) { + _, start := g.currentFile.AST().Stream().Around(optionSpan.Start) + before := token.NewCursorAt(start) + prev := before.Prev() + if prev.Keyword() != keyword.Option { + return token.Zero, token.Zero + } + _, end := g.currentFile.AST().Stream().Around(optionSpan.End) + after := token.NewCursorAt(end) + next := after.Next() + if next.Keyword() != keyword.Semi { + return token.Zero, token.Zero + } + return prev, next +} + +func (g *generator) rangeSourceCodeInfo(rangeAST ast.ExprAny, baseTag, startTag, endTag, index int32) { + reset := g.path.with(baseTag, index) + defer reset() + g.addSourceLocation(rangeAST.Span()) + + var startSpan, endSpan source.Span + switch rangeAST.Kind() { + case ast.ExprKindLiteral, ast.ExprKindPath: + startSpan = rangeAST.Span() + endSpan = rangeAST.Span() + case ast.ExprKindRange: + start, end := rangeAST.AsRange().Bounds() + startSpan = start.Span() + endSpan = end.Span() + } + + if startTag != 0 { + reset := g.path.with(startTag) + g.addSourceLocation(startSpan) + reset() + } + + if endTag != 0 { + reset := g.path.with(endTag) + g.addSourceLocation(endSpan) + reset() + } +} + +// addSourceLocationWithSourcePathElements is a helper that adds a new source location for +// the given span, source path elements, and comment tokens, then resets the path immediately. +func (g *generator) addSourceLocationWithSourcePathElements( + span source.Span, + sourcePathElements []int32, + checkForComments ...token.ID, +) { + reset := g.path.with(sourcePathElements...) + defer reset() + + g.addSourceLocation(span, checkForComments...) +} + +// addSourceLocation adds the source code info location based on the current path tracked +// by the [generator]. It also checks the given token IDs for comments. +func (g *generator) addSourceLocation(span source.Span, checkForComments ...token.ID) { + if g.sourceCodeInfo == nil || span.IsZero() { + return + } + + location := new(descriptorpb.SourceCodeInfo_Location) + g.sourceCodeInfo.Location = append(g.sourceCodeInfo.Location, location) + + location.Span = locationSpan(span) + location.Path = g.path.clone() + + // Comments are merged across the provided [token.ID]s. + for _, id := range checkForComments { + comments, ok := g.commentTracker.attributed[id] + if !ok { + continue + } + if leadingComment := comments.leadingComment(); leadingComment != "" { + location.LeadingComments = addr(leadingComment) + } + if trailingComment := comments.trailingComment(); trailingComment != "" { + location.TrailingComments = addr(trailingComment) + } + if detachedComments := comments.detachedComments(); len(detachedComments) > 0 { + location.LeadingDetachedComments = detachedComments + } + } +} + +// addr is a helper for creating a pointer out of any type, because Go is +// missing the syntax &"foo", etc. +func addr[T any](v T) *T { return &v } + +// locationSpan is a helper function for returning the [descriptorpb.SourceCodeInfo_Location] +// span for the given [source.Span]. +// +// The span for [descriptorpb.SourceCodeInfo_Location] always has exactly three or four: +// start line, start column, end line (optional, otherwise assumed same as start line), +// and end column. The line and column numbers are zero-based. +func locationSpan(span source.Span) []int32 { + start, end := span.StartLoc(), span.EndLoc() + if start.Line == end.Line { + return []int32{ + int32(start.Line) - 1, + int32(start.Column) - 1, + int32(end.Column) - 1, + } + } + return []int32{ + int32(start.Line) - 1, + int32(start.Column) - 1, + int32(end.Line) - 1, + int32(end.Column) - 1, + } +} diff --git a/experimental/fdp/sourcepath.go b/experimental/fdp/sourcepath.go new file mode 100644 index 000000000..7b78b2656 --- /dev/null +++ b/experimental/fdp/sourcepath.go @@ -0,0 +1,41 @@ +// Copyright 2020-2025 Buf Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package fdp + +import ( + "google.golang.org/protobuf/reflect/protoreflect" + + "github.com/bufbuild/protocompile/internal" +) + +// path is an extension of [protoreflect.SourcePath] to provide an API for path tracking. +type path protoreflect.SourcePath + +// clone returns a copy of the currently tracked source path. +func (p *path) clone() protoreflect.SourcePath { + return internal.ClonePath(protoreflect.SourcePath(*p)) +} + +// with adds the given elements to the tracked path and returns a reset function. The reset +// trims the length of the given elements off the tracked path. It is the caller's +// responsibility to ensure that reset is called on a valid path length. +func (p *path) with(elements ...int32) func() { + *p = append(*p, elements...) + return func() { + if len(*p) > 0 { + *p = (*p)[:len(*p)-len(elements)] + } + } +} diff --git a/experimental/ir/fdp.go b/experimental/ir/fdp.go deleted file mode 100644 index b3eaf3efc..000000000 --- a/experimental/ir/fdp.go +++ /dev/null @@ -1,477 +0,0 @@ -// Copyright 2020-2025 Buf Technologies, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package ir - -import ( - "math" - "slices" - "strconv" - - descriptorv1 "buf.build/gen/go/bufbuild/protodescriptor/protocolbuffers/go/buf/descriptor/v1" - "google.golang.org/protobuf/proto" - "google.golang.org/protobuf/types/descriptorpb" - - "github.com/bufbuild/protocompile/experimental/ast/predeclared" - "github.com/bufbuild/protocompile/experimental/ast/syntax" - "github.com/bufbuild/protocompile/experimental/ir/presence" - "github.com/bufbuild/protocompile/experimental/seq" - "github.com/bufbuild/protocompile/internal/ext/cmpx" - "github.com/bufbuild/protocompile/internal/ext/iterx" - "github.com/bufbuild/protocompile/internal/ext/slicesx" -) - -// DescriptorSetBytes generates a FileDescriptorSet for the given files, and returns the -// result as an encoded byte slice. -// -// The resulting FileDescriptorSet is always fully linked: it contains all dependencies except -// the WKTs, and all names are fully-qualified. -func DescriptorSetBytes(files []*File, options ...DescriptorOption) ([]byte, error) { - var dg descGenerator - for _, opt := range options { - if opt != nil { - opt(&dg) - } - } - - fds := new(descriptorpb.FileDescriptorSet) - dg.files(files, fds) - return proto.Marshal(fds) -} - -// DescriptorProtoBytes generates a single FileDescriptorProto for file, and returns the -// result as an encoded byte slice. -// -// The resulting FileDescriptorProto is fully linked: all names are fully-qualified. -func DescriptorProtoBytes(file *File, options ...DescriptorOption) ([]byte, error) { - var dg descGenerator - for _, opt := range options { - if opt != nil { - opt(&dg) - } - } - - fdp := new(descriptorpb.FileDescriptorProto) - dg.file(file, fdp) - return proto.Marshal(fdp) -} - -// DescriptorOption is an option to pass to [DescriptorSetBytes] or [DescriptorProtoBytes]. -type DescriptorOption func(*descGenerator) - -// IncludeDebugInfo sets whether or not to include google.protobuf.SourceCodeInfo in -// the output. -func IncludeSourceCodeInfo(flag bool) DescriptorOption { - return func(dg *descGenerator) { - dg.includeDebugInfo = flag - } -} - -// ExcludeFiles excludes the given files from the output of [DescriptorSetBytes]. -func ExcludeFiles(exclude func(*File) bool) DescriptorOption { - return func(dg *descGenerator) { - dg.exclude = exclude - } -} - -type descGenerator struct { - currentFile *File - includeDebugInfo bool - exclude func(*File) bool - - sourceCodeInfo *descriptorpb.SourceCodeInfo - sourceCodeInfoExtn *descriptorv1.SourceCodeInfoExtension -} - -func (dg *descGenerator) files(files []*File, fds *descriptorpb.FileDescriptorSet) { - // Build up all of the imported files. We can't just pull out the transitive - // imports for each file because we want the result to be sorted - // topologically. - for file := range topoSort(files) { - if dg.exclude != nil && dg.exclude(file) { - continue - } - - fdp := new(descriptorpb.FileDescriptorProto) - fds.File = append(fds.File, fdp) - - dg.file(file, fdp) - } -} - -func (dg *descGenerator) file(file *File, fdp *descriptorpb.FileDescriptorProto) { - dg.currentFile = file - if dg.includeDebugInfo { - dg.sourceCodeInfo = new(descriptorpb.SourceCodeInfo) - fdp.SourceCodeInfo = dg.sourceCodeInfo - - dg.sourceCodeInfoExtn = new(descriptorv1.SourceCodeInfoExtension) - proto.SetExtension(dg.sourceCodeInfo, descriptorv1.E_BufSourceCodeInfoExtension, dg.sourceCodeInfoExtn) - } - - fdp.Name = addr(file.Path()) - fdp.Package = addr(string(file.Package())) - - if file.Syntax().IsEdition() { - fdp.Syntax = addr("editions") - fdp.Edition = descriptorpb.Edition(file.Syntax()).Enum() - } else { - fdp.Syntax = addr(file.Syntax().String()) - } - - if dg.sourceCodeInfoExtn != nil { - dg.sourceCodeInfoExtn.IsSyntaxUnspecified = file.AST().Syntax().IsZero() - } - - // Canonicalize import order so that it does not change whenever we refactor - // internal structures. - imports := seq.ToSlice(file.Imports()) - slices.SortFunc(imports, cmpx.Key(func(imp Import) int { - return imp.Decl.KeywordToken().Span().Start - })) - for i, imp := range imports { - if !imp.Option { - fdp.Dependency = append(fdp.Dependency, imp.Path()) - if imp.Public { - fdp.PublicDependency = append(fdp.PublicDependency, int32(i)) - } - if imp.Weak { - fdp.WeakDependency = append(fdp.WeakDependency, int32(i)) - } - } else if imp.Option { - fdp.OptionDependency = append(fdp.OptionDependency, imp.Path()) - } - - if dg.sourceCodeInfoExtn != nil && !imp.Used { - dg.sourceCodeInfoExtn.UnusedDependency = append(dg.sourceCodeInfoExtn.UnusedDependency, int32(i)) - } - } - - for ty := range seq.Values(file.Types()) { - if ty.IsEnum() { - edp := new(descriptorpb.EnumDescriptorProto) - fdp.EnumType = append(fdp.EnumType, edp) - dg.enum(ty, edp) - continue - } - - mdp := new(descriptorpb.DescriptorProto) - fdp.MessageType = append(fdp.MessageType, mdp) - dg.message(ty, mdp) - } - - for service := range seq.Values(file.Services()) { - sdp := new(descriptorpb.ServiceDescriptorProto) - fdp.Service = append(fdp.Service, sdp) - dg.service(service, sdp) - } - - for extn := range seq.Values(file.Extensions()) { - fd := new(descriptorpb.FieldDescriptorProto) - fdp.Extension = append(fdp.Extension, fd) - dg.field(extn, fd) - } - - if options := file.Options(); !iterx.Empty(options.Fields()) { - fdp.Options = new(descriptorpb.FileOptions) - dg.options(options, fdp.Options) - } - - if dg.sourceCodeInfoExtn != nil && iterx.Empty2(dg.sourceCodeInfoExtn.ProtoReflect().Range) { - proto.ClearExtension(dg.sourceCodeInfo, descriptorv1.E_BufSourceCodeInfoExtension) - } -} - -func (dg *descGenerator) message(ty Type, mdp *descriptorpb.DescriptorProto) { - mdp.Name = addr(ty.Name()) - - for field := range seq.Values(ty.Members()) { - fd := new(descriptorpb.FieldDescriptorProto) - mdp.Field = append(mdp.Field, fd) - dg.field(field, fd) - } - - for extn := range seq.Values(ty.Extensions()) { - fd := new(descriptorpb.FieldDescriptorProto) - mdp.Extension = append(mdp.Extension, fd) - dg.field(extn, fd) - } - - for ty := range seq.Values(ty.Nested()) { - if ty.IsEnum() { - edp := new(descriptorpb.EnumDescriptorProto) - mdp.EnumType = append(mdp.EnumType, edp) - dg.enum(ty, edp) - continue - } - - nested := new(descriptorpb.DescriptorProto) - mdp.NestedType = append(mdp.NestedType, nested) - dg.message(ty, nested) - } - - for extensions := range seq.Values(ty.ExtensionRanges()) { - er := new(descriptorpb.DescriptorProto_ExtensionRange) - mdp.ExtensionRange = append(mdp.ExtensionRange, er) - - start, end := extensions.Range() - er.Start = addr(start) - er.End = addr(end + 1) // Exclusive. - - if options := extensions.Options(); !iterx.Empty(options.Fields()) { - er.Options = new(descriptorpb.ExtensionRangeOptions) - dg.options(options, er.Options) - } - } - - for reserved := range seq.Values(ty.ReservedRanges()) { - rr := new(descriptorpb.DescriptorProto_ReservedRange) - mdp.ReservedRange = append(mdp.ReservedRange, rr) - - start, end := reserved.Range() - rr.Start = addr(start) - rr.End = addr(end + 1) // Exclusive. - } - - for name := range seq.Values(ty.ReservedNames()) { - mdp.ReservedName = append(mdp.ReservedName, name.Name()) - } - - for oneof := range seq.Values(ty.Oneofs()) { - odp := new(descriptorpb.OneofDescriptorProto) - mdp.OneofDecl = append(mdp.OneofDecl, odp) - dg.oneof(oneof, odp) - } - - if dg.currentFile.Syntax() == syntax.Proto3 { - var names syntheticNames - - // Only now that we have added all of the normal oneofs do we add the - // synthetic oneofs. - for i, field := range seq.All(ty.Members()) { - if field.Presence() != presence.Explicit || - !field.Oneof().IsZero() { - continue - } - - fdp := mdp.Field[i] - fdp.Proto3Optional = addr(true) - fdp.OneofIndex = addr(int32(len(mdp.OneofDecl))) - mdp.OneofDecl = append(mdp.OneofDecl, &descriptorpb.OneofDescriptorProto{ - Name: addr(names.generate(field.Name(), ty)), - }) - } - } - - if options := ty.Options(); !iterx.Empty(options.Fields()) { - mdp.Options = new(descriptorpb.MessageOptions) - dg.options(options, mdp.Options) - } - - switch exported, explicit := ty.IsExported(); { - case !explicit: - break - case exported: - mdp.Visibility = descriptorpb.SymbolVisibility_VISIBILITY_EXPORT.Enum() - case !exported: - mdp.Visibility = descriptorpb.SymbolVisibility_VISIBILITY_LOCAL.Enum() - } -} - -var predeclaredToFDPType = []descriptorpb.FieldDescriptorProto_Type{ - predeclared.Int32: descriptorpb.FieldDescriptorProto_TYPE_INT32, - predeclared.Int64: descriptorpb.FieldDescriptorProto_TYPE_INT64, - predeclared.UInt32: descriptorpb.FieldDescriptorProto_TYPE_UINT32, - predeclared.UInt64: descriptorpb.FieldDescriptorProto_TYPE_UINT64, - predeclared.SInt32: descriptorpb.FieldDescriptorProto_TYPE_SINT32, - predeclared.SInt64: descriptorpb.FieldDescriptorProto_TYPE_SINT64, - - predeclared.Fixed32: descriptorpb.FieldDescriptorProto_TYPE_FIXED32, - predeclared.Fixed64: descriptorpb.FieldDescriptorProto_TYPE_FIXED64, - predeclared.SFixed32: descriptorpb.FieldDescriptorProto_TYPE_SFIXED32, - predeclared.SFixed64: descriptorpb.FieldDescriptorProto_TYPE_SFIXED64, - - predeclared.Float32: descriptorpb.FieldDescriptorProto_TYPE_FLOAT, - predeclared.Float64: descriptorpb.FieldDescriptorProto_TYPE_DOUBLE, - - predeclared.Bool: descriptorpb.FieldDescriptorProto_TYPE_BOOL, - predeclared.String: descriptorpb.FieldDescriptorProto_TYPE_STRING, - predeclared.Bytes: descriptorpb.FieldDescriptorProto_TYPE_BYTES, -} - -func (dg *descGenerator) field(f Member, fdp *descriptorpb.FieldDescriptorProto) { - fdp.Name = addr(f.Name()) - fdp.Number = addr(f.Number()) - - switch f.Presence() { - case presence.Explicit, presence.Implicit, presence.Shared: - fdp.Label = descriptorpb.FieldDescriptorProto_LABEL_OPTIONAL.Enum() - case presence.Repeated: - fdp.Label = descriptorpb.FieldDescriptorProto_LABEL_REPEATED.Enum() - case presence.Required: - fdp.Label = descriptorpb.FieldDescriptorProto_LABEL_REQUIRED.Enum() - } - - if ty := f.Element(); !ty.IsZero() { - if kind, _ := slicesx.Get(predeclaredToFDPType, ty.Predeclared()); kind != 0 { - fdp.Type = kind.Enum() - } else { - fdp.TypeName = addr(string(ty.FullName().ToAbsolute())) - - switch { - case ty.IsEnum(): - fdp.Type = descriptorpb.FieldDescriptorProto_TYPE_ENUM.Enum() - case f.IsGroup(): - fdp.Type = descriptorpb.FieldDescriptorProto_TYPE_GROUP.Enum() - default: - fdp.Type = descriptorpb.FieldDescriptorProto_TYPE_MESSAGE.Enum() - } - } - } - - if f.IsExtension() && f.Container().FullName() != "" { - fdp.Extendee = addr(string(f.Container().FullName().ToAbsolute())) - } - - if oneof := f.Oneof(); !oneof.IsZero() { - fdp.OneofIndex = addr(int32(oneof.Index())) - } - - if options := f.Options(); !iterx.Empty(options.Fields()) { - fdp.Options = new(descriptorpb.FieldOptions) - dg.options(options, fdp.Options) - } - - fdp.JsonName = addr(f.JSONName()) - - d := f.PseudoOptions().Default - if !d.IsZero() { - if v, ok := d.AsBool(); ok { - fdp.DefaultValue = addr(strconv.FormatBool(v)) - } else if v, ok := d.AsInt(); ok { - fdp.DefaultValue = addr(strconv.FormatInt(v, 10)) - } else if v, ok := d.AsUInt(); ok { - fdp.DefaultValue = addr(strconv.FormatUint(v, 10)) - } else if v, ok := d.AsFloat(); ok { - switch { - case math.IsInf(v, 1): - fdp.DefaultValue = addr("inf") - case math.IsInf(v, -1): - fdp.DefaultValue = addr("-inf") - case math.IsNaN(v): - fdp.DefaultValue = addr("nan") // Goodbye NaN payload. :( - default: - fdp.DefaultValue = addr(strconv.FormatFloat(v, 'g', -1, 64)) - } - } else if v, ok := d.AsString(); ok { - fdp.DefaultValue = addr(v) - } - } -} - -func (dg *descGenerator) oneof(o Oneof, odp *descriptorpb.OneofDescriptorProto) { - odp.Name = addr(o.Name()) - - if options := o.Options(); !iterx.Empty(options.Fields()) { - odp.Options = new(descriptorpb.OneofOptions) - dg.options(options, odp.Options) - } -} - -func (dg *descGenerator) enum(ty Type, edp *descriptorpb.EnumDescriptorProto) { - edp.Name = addr(ty.Name()) - - for field := range seq.Values(ty.Members()) { - evd := new(descriptorpb.EnumValueDescriptorProto) - edp.Value = append(edp.Value, evd) - dg.enumValue(field, evd) - } - - for reserved := range seq.Values(ty.ReservedRanges()) { - rr := new(descriptorpb.EnumDescriptorProto_EnumReservedRange) - edp.ReservedRange = append(edp.ReservedRange, rr) - - start, end := reserved.Range() - rr.Start = addr(start) - rr.End = addr(end) // Inclusive, not exclusive like the one for messages! - } - - for name := range seq.Values(ty.ReservedNames()) { - edp.ReservedName = append(edp.ReservedName, name.Name()) - } - - if options := ty.Options(); !iterx.Empty(options.Fields()) { - edp.Options = new(descriptorpb.EnumOptions) - dg.options(options, edp.Options) - } - - switch exported, explicit := ty.IsExported(); { - case !explicit: - break - case exported: - edp.Visibility = descriptorpb.SymbolVisibility_VISIBILITY_EXPORT.Enum() - case !exported: - edp.Visibility = descriptorpb.SymbolVisibility_VISIBILITY_LOCAL.Enum() - } -} - -func (dg *descGenerator) enumValue(f Member, evdp *descriptorpb.EnumValueDescriptorProto) { - evdp.Name = addr(f.Name()) - evdp.Number = addr(f.Number()) - - if options := f.Options(); !iterx.Empty(options.Fields()) { - evdp.Options = new(descriptorpb.EnumValueOptions) - dg.options(options, evdp.Options) - } -} - -func (dg *descGenerator) service(s Service, sdp *descriptorpb.ServiceDescriptorProto) { - sdp.Name = addr(s.Name()) - - for method := range seq.Values(s.Methods()) { - mdp := new(descriptorpb.MethodDescriptorProto) - sdp.Method = append(sdp.Method, mdp) - dg.method(method, mdp) - } - - if options := s.Options(); !iterx.Empty(options.Fields()) { - sdp.Options = new(descriptorpb.ServiceOptions) - dg.options(options, sdp.Options) - } -} - -func (dg *descGenerator) method(m Method, mdp *descriptorpb.MethodDescriptorProto) { - mdp.Name = addr(m.Name()) - - in, inStream := m.Input() - mdp.InputType = addr(string(in.FullName())) - mdp.ClientStreaming = addr(inStream) - - out, outStream := m.Output() - mdp.OutputType = addr(string(out.FullName())) - mdp.ServerStreaming = addr(outStream) - - if options := m.Options(); !iterx.Empty(options.Fields()) { - mdp.Options = new(descriptorpb.MethodOptions) - dg.options(options, mdp.Options) - } -} - -func (dg *descGenerator) options(v MessageValue, target proto.Message) { - target.ProtoReflect().SetUnknown(v.Marshal(nil, nil)) -} - -// addr is a helper for creating a pointer out of any type, because Go is -// missing the syntax &"foo", etc. -func addr[T any](v T) *T { return &v } diff --git a/experimental/ir/ir_file.go b/experimental/ir/ir_file.go index 1abb004b0..9924afbab 100644 --- a/experimental/ir/ir_file.go +++ b/experimental/ir/ir_file.go @@ -378,9 +378,9 @@ func (f *File) FindSymbol(fqn FullName) Symbol { unsafex.BytesAlias[[]byte](string(fqn)))) } -// topoSort sorts a graph of [File]s according to their dependency graph, +// TopoSort sorts a graph of [File]s according to their dependency graph, // in topological order. Files with no dependencies are yielded first. -func topoSort(files []*File) iter.Seq[*File] { +func TopoSort(files []*File) iter.Seq[*File] { // NOTE: This cannot panic because Files, by construction, do not contain // graph cycles. return toposort.Sort( diff --git a/experimental/ir/ir_member.go b/experimental/ir/ir_member.go index edd1afe1a..12631c8ff 100644 --- a/experimental/ir/ir_member.go +++ b/experimental/ir/ir_member.go @@ -46,21 +46,22 @@ import ( type Member id.Node[Member, *File, *rawMember] type rawMember struct { - featureInfo *rawFeatureInfo - elem Ref[Type] - number int32 - extendee id.ID[Extend] - fqn intern.ID - name intern.ID - def id.ID[ast.DeclDef] - parent id.ID[Type] - features id.ID[FeatureSet] - options id.ID[Value] - oneof int32 - optionTargets uint32 - jsonName intern.ID - isGroup bool - numberOk bool + featureInfo *rawFeatureInfo + elem Ref[Type] + number int32 + extendee id.ID[Extend] + fqn intern.ID + name intern.ID + syntheticOneofName intern.ID + def id.ID[ast.DeclDef] + parent id.ID[Type] + features id.ID[FeatureSet] + options id.ID[Value] + oneof int32 + optionTargets uint32 + jsonName intern.ID + isGroup bool + numberOk bool } // IsMessageField returns whether this is a non-extension message field. @@ -390,6 +391,19 @@ func (m Member) Deprecated() Value { return Value{} } +// SyntheticOneofName returns the name of the corresponding synthetic oneof for this +// member, if there should be one. +// +// For proto3 sources, a oneof is synthesized to track explicit optional presence of a +// field. For details on generating the synthesized name, see the docs for [syntheticNames] +// and/or refer to https://protobuf.com/docs/descriptors#synthetic-oneofs. +func (m Member) SyntheticOneofName() string { + if m.IsZero() { + return "" + } + return m.Context().session.intern.Value(m.Raw().syntheticOneofName) +} + // CanTarget returns whether this message field can be set as an option for the // given option target type. // @@ -716,6 +730,7 @@ type ReservedName struct { type rawReservedName struct { ast ast.ExprAny name intern.ID + decl id.ID[ast.DeclRange] } // AST returns the expression that this name was evaluated from, if known. @@ -726,6 +741,16 @@ func (r ReservedName) AST() ast.ExprAny { return r.raw.ast } +// DeclAST returns the declaration this name came from. Multiple names may +// have the same declaration. +func (r ReservedName) DeclAST() ast.DeclRange { + if r.IsZero() { + return ast.DeclRange{} + } + + return id.Wrap(r.Context().AST(), r.raw.decl) +} + // Name returns the name (i.e., an identifier) that was reserved. func (r ReservedName) Name() string { if r.IsZero() { diff --git a/experimental/ir/ir_test.go b/experimental/ir/ir_test.go index b09392a5f..1df1235c8 100644 --- a/experimental/ir/ir_test.go +++ b/experimental/ir/ir_test.go @@ -32,6 +32,7 @@ import ( "gopkg.in/yaml.v3" "github.com/bufbuild/protocompile/experimental/ast/predeclared" + "github.com/bufbuild/protocompile/experimental/fdp" "github.com/bufbuild/protocompile/experimental/incremental" "github.com/bufbuild/protocompile/experimental/incremental/queries" "github.com/bufbuild/protocompile/experimental/ir" @@ -202,9 +203,9 @@ func TestIR(t *testing.T) { irs = slices.DeleteFunc(irs, func(f *ir.File) bool { return f == nil }) if test.Descriptor { - bytes, err := ir.DescriptorSetBytes(irs, - ir.IncludeSourceCodeInfo(test.SourceCodeInfo), - ir.ExcludeFiles((*ir.File).IsDescriptorProto), + bytes, err := fdp.DescriptorSetBytes(irs, + fdp.IncludeSourceCodeInfo(test.SourceCodeInfo), + fdp.ExcludeFiles((*ir.File).IsDescriptorProto), ) require.NoError(t, err) diff --git a/experimental/ir/ir_value.go b/experimental/ir/ir_value.go index 36ec26f67..0bc1608db 100644 --- a/experimental/ir/ir_value.go +++ b/experimental/ir/ir_value.go @@ -143,13 +143,13 @@ func (v Value) OptionSpans() seq.Indexer[source.Spanner] { slice = v.Raw().exprs } - return seq.NewFixedSlice(slice, func(_ int, p id.Dyn[ast.ExprAny, ast.ExprKind]) source.Spanner { + return seq.NewFixedSlice(slice, func(i int, p id.Dyn[ast.ExprAny, ast.ExprKind]) source.Spanner { c := v.Context().AST() expr := id.WrapDyn(c, p) if field := expr.AsField(); !field.IsZero() { return field } - return source.Join(ast.ExprPath{Path: v.Raw().optionPaths[0].In(c)}, expr) + return source.Join(ast.ExprPath{Path: v.Raw().optionPaths[i].In(c)}, expr) }) } diff --git a/experimental/ir/lower_resolve.go b/experimental/ir/lower_resolve.go index 2f7bb482a..5ad929443 100644 --- a/experimental/ir/lower_resolve.go +++ b/experimental/ir/lower_resolve.go @@ -38,8 +38,20 @@ func resolveNames(file *File, r *report.Report) { for ty := range seq.Values(file.AllTypes()) { if ty.IsMessage() { + var names syntheticNames for field := range seq.Values(ty.Members()) { resolveFieldType(field, r) + + // For proto3 sources, we need to resolve the synthetic oneof names for fields with + // explicit optional presence. See the docs for [Member.SyntheticOneofName] for details. + if file.syntax == syntax.Proto3 && field.Presence() == presence.Explicit { + if !field.Oneof().IsZero() { + continue + } + field.Raw().syntheticOneofName = file.session.intern.Intern( + names.generate(field.Name(), field.Parent()), + ) + } } } } diff --git a/experimental/ir/lower_walk.go b/experimental/ir/lower_walk.go index 129ae0a93..3f5011f0d 100644 --- a/experimental/ir/lower_walk.go +++ b/experimental/ir/lower_walk.go @@ -206,6 +206,7 @@ func (w *walker) newType(def ast.DeclDef, parent any) Type { ty.Raw().reservedNames = append(ty.Raw().reservedNames, rawReservedName{ ast: v, name: ty.Context().session.intern.Intern(name), + decl: rangeDecl.ID(), }) continue } diff --git a/experimental/ir/synthetic.go b/experimental/ir/synthetic.go index 69adaed6a..9264bf8bf 100644 --- a/experimental/ir/synthetic.go +++ b/experimental/ir/synthetic.go @@ -75,7 +75,7 @@ func (sn *syntheticNames) generate(candidate string, message Type) string { return sn.generateIn(candidate, &message.Context().session.intern) } -// generateIn is the part of [syntheticNames.generate] that actually constructs +// generateIn is the part of [SyntheticNames.generate] that actually constructs // the string. // // it is outlined so that it can be tested separately. diff --git a/experimental/ir/testdata/comments/attribution.proto b/experimental/ir/testdata/comments/attribution.proto new file mode 100644 index 000000000..03537950c --- /dev/null +++ b/experimental/ir/testdata/comments/attribution.proto @@ -0,0 +1,96 @@ +// Copyright 2020-2025 Buf Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//% descriptor: true +//% source_code_info: true +edition = "2023"; + +// This is a leading comment for the package. +package buf.test; +// This is still a trailing comment for the package. + +// This is a leading comment for the java_package option source code info, but not the +// top-level option source code info. +option java_package = "build.buf.protocompile"; // Similarly, this is a trailing comment. +option java_multiple_files = true; + +// This is a leading comment for the import source code info, but not for the corresponding +// public import source code info. +import public "google/protobuf/descriptor.proto"; // Same with this trailing comment. + +// This, as expected, is a leading comment for Foo. +message /*this is thrown away*/ Foo { + // This is the TRAILING comment for Foo. (It is NOT + // a detached comment for baz.) + + // Leading comment for message option + option (a) = 10; + option (a) = 20; // Trailing comment for the second message option + + // This is a leading comment for baz. + string /*this is also thrown away*/ baz = 1; + // This is a trailing comment for baz. + + // This is a leading comment for the reserved range. + reserved 2, 3 to 4,/*comments between expressions are dropped*/ 5 to 10; // This is a trailing comment for the reserved range. + + // This is a leading comment for the reserved names declaration. + reserved foo, /*this gets dropped*/ bar; // This is a trailing comment for the reserved range. + + repeated int32 options = 11 /* This is dropped. */ [ + // Comments for option messages are dropped. + features.repeated_field_encoding = PACKED, // This is also dropped + (c) = 15, + (c) = 16 + ]; + + // This is also thrown away. +} +// This is NOT a trailing comment. It's also not considered +// a detached comment for Bar. It is discarded. + +// This IS a detached comment for Bar. + +// This is also a detached comment for Bar. + + + + +/* + * A leading block comment for Bar. + */ +message Bar {} + +// This is the leading comment for enum Baz. +enum Baz { // This is a trailing comment for enum Baz. + BAZ_ONE = 0; +} // This is NOT a trailing comment for Baz. It is discarded. + +// This is the leading comment for service FooService. +service FooService { // This is a trailing comment for service FooService. + // Leading comment for method FooForBar. + rpc FooForBar(Bar) returns (Foo); // Trailing comment for FooForBar. + + // Leading comment for method BarForBar. + rpc BarForBar(Bar) returns (Bar) /* This is thrown away */ { // Trailing comment for BarForBar. + }; // This is NOT a trailing comment for BarForBar. It is discarded. +} // This is NOT a trailing comment for FooService. It is discarded. + +extend google.protobuf.FieldOptions { + repeated int32 c = 1000; +} + +extend google.protobuf.MessageOptions { + repeated int32 a = 1000; +} diff --git a/experimental/ir/testdata/comments/attribution.proto.fds.yaml b/experimental/ir/testdata/comments/attribution.proto.fds.yaml new file mode 100644 index 000000000..0ae23f836 --- /dev/null +++ b/experimental/ir/testdata/comments/attribution.proto.fds.yaml @@ -0,0 +1,177 @@ +file: +- name: "testdata/comments/attribution.proto" + package: "buf.test" + dependency: ["google/protobuf/descriptor.proto"] + public_dependency: [0] + message_type: + - name: "Foo" + field: + - name: "baz" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + json_name: "baz" + - name: "options" + number: 11 + label: LABEL_REPEATED + type: TYPE_INT32 + json_name: "options" + options: + features.repeated_field_encoding: PACKED + $unknown: "1000: {`0f10`}" + options.$unknown: "1000: {`0a14`}" + reserved_range: + - { start: 2, end: 3 } + - { start: 3, end: 5 } + - { start: 5, end: 11 } + reserved_name: ["foo", "bar"] + - name: "Bar" + enum_type: [{ name: "Baz", value: [{ name: "BAZ_ONE", number: 0 }] }] + service: + - name: "FooService" + method: + - name: "FooForBar" + input_type: "buf.test.Bar" + output_type: "buf.test.Foo" + client_streaming: false + server_streaming: false + - name: "BarForBar" + input_type: "buf.test.Bar" + output_type: "buf.test.Bar" + client_streaming: false + server_streaming: false + extension: + - name: "c" + number: 1000 + label: LABEL_REPEATED + type: TYPE_INT32 + extendee: ".google.protobuf.FieldOptions" + json_name: "c" + - name: "a" + number: 1000 + label: LABEL_REPEATED + type: TYPE_INT32 + extendee: ".google.protobuf.MessageOptions" + json_name: "a" + options: { java_package: "build.buf.protocompile", java_multiple_files: true } + source_code_info.location: + - span: [16, 0, 95, 1] + - path: [12] + span: [16, 0, 17] + leading_comments: "% descriptor: true\n% source_code_info: true\n" + leading_detached_comments: + - " Copyright 2020-2025 Buf Technologies, Inc.\n\n Licensed under the Apache License, Version 2.0 (the \"License\");\n you may not use this file except in compliance with the License.\n You may obtain a copy of the License at\n\n http://www.apache.org/licenses/LICENSE-2.0\n\n Unless required by applicable law or agreed to in writing, software\n distributed under the License is distributed on an \"AS IS\" BASIS,\n WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n See the License for the specific language governing permissions and\n limitations under the License.\n" + - path: [2] + span: [19, 0, 17] + leading_comments: " This is a leading comment for the package.\n" + trailing_comments: " This is still a trailing comment for the package.\n" + - { path: [8], span: [24, 0, 47] } + - path: [8, 1] + span: [24, 0, 47] + leading_comments: " This is a leading comment for the java_package option source code info, but not the\n top-level option source code info.\n" + trailing_comments: " Similarly, this is a trailing comment." + - { path: [8], span: [25, 0, 34] } + - { path: [8, 10], span: [25, 0, 34] } + - path: [3, 0] + span: [29, 0, 49] + leading_comments: " This is a leading comment for the import source code info, but not for the corresponding\n public import source code info.\n" + trailing_comments: " Same with this trailing comment." + - { path: [10, 0], span: [29, 7, 13] } + - path: [4, 0] + span: [32, 0, 58, 1] + leading_comments: " This, as expected, is a leading comment for Foo.\n" + trailing_comments: " This is the TRAILING comment for Foo. (It is NOT\n a detached comment for baz.)\n" + - { path: [4, 0, 1], span: [32, 32, 35] } + - path: [4, 0, 7, 1000, 0] + span: [37, 2, 18] + leading_comments: " Leading comment for message option\n" + - { path: [4, 0, 7], span: [37, 2, 18] } + - path: [4, 0, 7, 1000, 1] + span: [38, 2, 18] + trailing_comments: " Trailing comment for the second message option" + - { path: [4, 0, 7], span: [38, 2, 18] } + - { path: [4, 0, 2, 0, 5], span: [41, 2, 8] } + - path: [4, 0, 2, 0] + span: [41, 2, 46] + leading_comments: " This is a leading comment for baz.\n" + trailing_comments: " This is a trailing comment for baz.\n" + - { path: [4, 0, 2, 0, 1], span: [41, 38, 41] } + - { path: [4, 0, 2, 0, 3], span: [41, 44, 45] } + - path: [4, 0, 9] + span: [45, 2, 74] + leading_comments: " This is a leading comment for the reserved range.\n" + trailing_comments: " This is a trailing comment for the reserved range." + - { path: [4, 0, 9, 0], span: [45, 11, 12] } + - { path: [4, 0, 9, 0, 1], span: [45, 11, 12] } + - { path: [4, 0, 9, 0, 2], span: [45, 11, 12] } + - { path: [4, 0, 9, 1, 1], span: [45, 14, 15] } + - { path: [4, 0, 9, 1], span: [45, 14, 20] } + - { path: [4, 0, 9, 1, 2], span: [45, 19, 20] } + - { path: [4, 0, 9, 2, 1], span: [45, 66, 67] } + - { path: [4, 0, 9, 2], span: [45, 66, 73] } + - { path: [4, 0, 9, 2, 2], span: [45, 71, 73] } + - path: [4, 0, 10] + span: [48, 2, 42] + leading_comments: " This is a leading comment for the reserved names declaration.\n" + trailing_comments: " This is a trailing comment for the reserved range." + - { path: [4, 0, 10, 0], span: [48, 11, 14] } + - { path: [4, 0, 10, 1], span: [48, 38, 41] } + - { path: [4, 0, 2, 1, 4], span: [50, 2, 10] } + - { path: [4, 0, 2, 1], span: [50, 2, 55, 4] } + - { path: [4, 0, 2, 1, 5], span: [50, 11, 16] } + - { path: [4, 0, 2, 1, 1], span: [50, 17, 24] } + - { path: [4, 0, 2, 1, 3], span: [50, 27, 29] } + - { path: [4, 0, 2, 1, 8], span: [50, 53, 55, 3] } + - { path: [4, 0, 2, 1, 8, 21, 3], span: [52, 4, 45] } + - { path: [4, 0, 2, 1, 8, 1000, 0], span: [53, 4, 12] } + - { path: [4, 0, 2, 1, 8, 1000, 1], span: [54, 4, 12] } + - path: [4, 1] + span: [72, 0, 14] + leading_comments: "\n A leading block comment for Bar.\n " + leading_detached_comments: + - " This IS a detached comment for Bar.\n" + - " This is also a detached comment for Bar.\n" + - { path: [4, 1, 1], span: [72, 8, 11] } + - path: [5, 0] + span: [75, 0, 77, 1] + leading_comments: " This is the leading comment for enum Baz.\n" + trailing_comments: " This is a trailing comment for enum Baz." + - { path: [5, 0, 1], span: [75, 5, 8] } + - { path: [5, 0, 2, 0, 1], span: [76, 2, 9] } + - { path: [5, 0, 2, 0], span: [76, 2, 14] } + - { path: [5, 0, 2, 0, 2], span: [76, 12, 13] } + - path: [6, 0] + span: [80, 0, 87, 1] + leading_comments: " This is the leading comment for service FooService.\n" + trailing_comments: " This is a trailing comment for service FooService." + - { path: [6, 0, 1], span: [80, 8, 18] } + - path: [6, 0, 2, 0] + span: [82, 2, 35] + leading_comments: " Leading comment for method FooForBar.\n" + trailing_comments: " Trailing comment for FooForBar." + - { path: [6, 0, 2, 0, 1], span: [82, 6, 15] } + - { path: [6, 0, 2, 0, 2], span: [82, 16, 19] } + - { path: [6, 0, 2, 0, 3], span: [82, 30, 33] } + - path: [6, 0, 2, 1] + span: [85, 2, 86, 3] + leading_comments: " Leading comment for method BarForBar.\n" + trailing_comments: " Trailing comment for BarForBar." + - { path: [6, 0, 2, 1, 1], span: [85, 6, 15] } + - { path: [6, 0, 2, 1, 2], span: [85, 16, 19] } + - { path: [6, 0, 2, 1, 3], span: [85, 30, 33] } + - { path: [7], span: [89, 0, 91, 1] } + - { path: [7, 0, 2], span: [89, 7, 35] } + - { path: [7, 0, 4], span: [90, 2, 10] } + - { path: [7, 0], span: [90, 2, 26] } + - { path: [7, 0, 5], span: [90, 11, 16] } + - { path: [7, 0, 1], span: [90, 17, 18] } + - { path: [7, 0, 3], span: [90, 21, 25] } + - { path: [7], span: [93, 0, 95, 1] } + - { path: [7, 1, 2], span: [93, 7, 37] } + - { path: [7, 1, 4], span: [94, 2, 10] } + - { path: [7, 1], span: [94, 2, 26] } + - { path: [7, 1, 5], span: [94, 11, 16] } + - { path: [7, 1, 1], span: [94, 17, 18] } + - { path: [7, 1, 3], span: [94, 21, 25] } + syntax: "editions" + edition: EDITION_2023 diff --git a/experimental/ir/testdata/empty.proto.yaml.fds.yaml b/experimental/ir/testdata/empty.proto.yaml.fds.yaml index 0c97ea9ff..3d4bddeea 100644 --- a/experimental/ir/testdata/empty.proto.yaml.fds.yaml +++ b/experimental/ir/testdata/empty.proto.yaml.fds.yaml @@ -2,6 +2,7 @@ file: - name: "empty.proto" package: "" source_code_info: + location: [{ span: [0, 0, 0] }] (buf.descriptor.v1.buf_source_code_info_extension): is_syntax_unspecified: true syntax: "proto2" diff --git a/experimental/ir/testdata/imports/unused.proto.yaml.fds.yaml b/experimental/ir/testdata/imports/unused.proto.yaml.fds.yaml index bd4c6be5a..7ad2166d2 100644 --- a/experimental/ir/testdata/imports/unused.proto.yaml.fds.yaml +++ b/experimental/ir/testdata/imports/unused.proto.yaml.fds.yaml @@ -2,26 +2,50 @@ file: - name: "d.proto" package: "buf.test" message_type: [{ name: "D" }] - source_code_info: {} + source_code_info.location: + - span: [0, 0, 3, 12] + - { path: [12], span: [0, 0, 18] } + - { path: [2], span: [1, 0, 17] } + - { path: [4, 0], span: [3, 0, 12] } + - { path: [4, 0, 1], span: [3, 8, 9] } syntax: "proto3" - name: "p.proto" package: "buf.test" message_type: [{ name: "P" }] - source_code_info: {} + source_code_info.location: + - span: [0, 0, 3, 12] + - { path: [12], span: [0, 0, 18] } + - { path: [2], span: [1, 0, 17] } + - { path: [4, 0], span: [3, 0, 12] } + - { path: [4, 0, 1], span: [3, 8, 9] } syntax: "proto3" - name: "c.proto" package: "buf.test" dependency: ["p.proto"] public_dependency: [0] message_type: [{ name: "C" }] - source_code_info: {} + source_code_info.location: + - span: [0, 0, 5, 24] + - { path: [12], span: [0, 0, 18] } + - { path: [2], span: [1, 0, 17] } + - { path: [4, 0], span: [3, 0, 12] } + - { path: [4, 0, 1], span: [3, 8, 9] } + - { path: [3, 0], span: [5, 0, 24] } + - { path: [10, 0], span: [5, 7, 13] } syntax: "proto3" - name: "b.proto" package: "buf.test" dependency: ["p.proto"] public_dependency: [0] message_type: [{ name: "B" }] - source_code_info: {} + source_code_info.location: + - span: [0, 0, 5, 24] + - { path: [12], span: [0, 0, 18] } + - { path: [2], span: [1, 0, 17] } + - { path: [4, 0], span: [3, 0, 12] } + - { path: [4, 0, 1], span: [3, 8, 9] } + - { path: [3, 0], span: [5, 0, 24] } + - { path: [10, 0], span: [5, 7, 13] } syntax: "proto3" - name: "a1.proto" package: "buf.test" @@ -47,13 +71,42 @@ file: type: TYPE_MESSAGE type_name: ".buf.test.D" json_name: "d" - source_code_info: {} + source_code_info.location: + - span: [0, 0, 11, 1] + - { path: [12], span: [0, 0, 18] } + - { path: [2], span: [1, 0, 17] } + - { path: [3, 0], span: [3, 0, 17] } + - { path: [3, 1], span: [4, 0, 17] } + - { path: [3, 2], span: [5, 0, 17] } + - { path: [4, 0], span: [7, 0, 11, 1] } + - { path: [4, 0, 1], span: [7, 8, 10] } + - { path: [4, 0, 2, 0, 6], span: [8, 2, 3] } + - { path: [4, 0, 2, 0], span: [8, 2, 10] } + - { path: [4, 0, 2, 0, 1], span: [8, 4, 5] } + - { path: [4, 0, 2, 0, 3], span: [8, 8, 9] } + - { path: [4, 0, 2, 1, 6], span: [9, 2, 3] } + - { path: [4, 0, 2, 1], span: [9, 2, 10] } + - { path: [4, 0, 2, 1, 1], span: [9, 4, 5] } + - { path: [4, 0, 2, 1, 3], span: [9, 8, 9] } + - { path: [4, 0, 2, 2, 6], span: [10, 2, 3] } + - { path: [4, 0, 2, 2], span: [10, 2, 10] } + - { path: [4, 0, 2, 2, 1], span: [10, 4, 5] } + - { path: [4, 0, 2, 2, 3], span: [10, 8, 9] } syntax: "proto3" - name: "a2.proto" package: "buf.test" dependency: ["b.proto", "c.proto", "d.proto"] message_type: [{ name: "A2" }] source_code_info: + location: + - span: [0, 0, 8, 1] + - { path: [12], span: [0, 0, 18] } + - { path: [2], span: [1, 0, 17] } + - { path: [3, 0], span: [3, 0, 17] } + - { path: [3, 1], span: [4, 0, 17] } + - { path: [3, 2], span: [5, 0, 17] } + - { path: [4, 0], span: [7, 0, 8, 1] } + - { path: [4, 0, 1], span: [7, 8, 10] } (buf.descriptor.v1.buf_source_code_info_extension): unused_dependency: [0, 1, 2] syntax: "proto3" @@ -76,6 +129,23 @@ file: type_name: ".buf.test.D" json_name: "d" source_code_info: + location: + - span: [0, 0, 10, 1] + - { path: [12], span: [0, 0, 18] } + - { path: [2], span: [1, 0, 17] } + - { path: [3, 0], span: [3, 0, 17] } + - { path: [3, 1], span: [4, 0, 17] } + - { path: [3, 2], span: [5, 0, 17] } + - { path: [4, 0], span: [7, 0, 10, 1] } + - { path: [4, 0, 1], span: [7, 8, 10] } + - { path: [4, 0, 2, 0, 6], span: [8, 2, 3] } + - { path: [4, 0, 2, 0], span: [8, 2, 10] } + - { path: [4, 0, 2, 0, 1], span: [8, 4, 5] } + - { path: [4, 0, 2, 0, 3], span: [8, 8, 9] } + - { path: [4, 0, 2, 1, 6], span: [9, 2, 3] } + - { path: [4, 0, 2, 1], span: [9, 2, 10] } + - { path: [4, 0, 2, 1, 1], span: [9, 4, 5] } + - { path: [4, 0, 2, 1, 3], span: [9, 8, 9] } (buf.descriptor.v1.buf_source_code_info_extension): unused_dependency: [1] syntax: "proto3" @@ -104,6 +174,27 @@ file: type_name: ".buf.test.D" json_name: "d" source_code_info: + location: + - span: [0, 0, 11, 1] + - { path: [12], span: [0, 0, 18] } + - { path: [2], span: [1, 0, 17] } + - { path: [3, 0], span: [3, 0, 17] } + - { path: [3, 1], span: [4, 0, 17] } + - { path: [3, 2], span: [5, 0, 17] } + - { path: [4, 0], span: [7, 0, 11, 1] } + - { path: [4, 0, 1], span: [7, 8, 10] } + - { path: [4, 0, 2, 0, 6], span: [8, 2, 3] } + - { path: [4, 0, 2, 0], span: [8, 2, 10] } + - { path: [4, 0, 2, 0, 1], span: [8, 4, 5] } + - { path: [4, 0, 2, 0, 3], span: [8, 8, 9] } + - { path: [4, 0, 2, 1, 6], span: [9, 2, 3] } + - { path: [4, 0, 2, 1], span: [9, 2, 10] } + - { path: [4, 0, 2, 1, 1], span: [9, 4, 5] } + - { path: [4, 0, 2, 1, 3], span: [9, 8, 9] } + - { path: [4, 0, 2, 2, 6], span: [10, 2, 3] } + - { path: [4, 0, 2, 2], span: [10, 2, 10] } + - { path: [4, 0, 2, 2, 1], span: [10, 4, 5] } + - { path: [4, 0, 2, 2, 3], span: [10, 8, 9] } (buf.descriptor.v1.buf_source_code_info_extension): unused_dependency: [1] syntax: "proto3" @@ -131,7 +222,27 @@ file: type: TYPE_MESSAGE type_name: ".buf.test.D" json_name: "d" - source_code_info: {} + source_code_info.location: + - span: [0, 0, 11, 1] + - { path: [12], span: [0, 0, 18] } + - { path: [2], span: [1, 0, 17] } + - { path: [3, 0], span: [3, 0, 17] } + - { path: [3, 1], span: [4, 0, 17] } + - { path: [3, 2], span: [5, 0, 17] } + - { path: [4, 0], span: [7, 0, 11, 1] } + - { path: [4, 0, 1], span: [7, 8, 10] } + - { path: [4, 0, 2, 0, 6], span: [8, 2, 3] } + - { path: [4, 0, 2, 0], span: [8, 2, 10] } + - { path: [4, 0, 2, 0, 1], span: [8, 4, 5] } + - { path: [4, 0, 2, 0, 3], span: [8, 8, 9] } + - { path: [4, 0, 2, 1, 6], span: [9, 2, 3] } + - { path: [4, 0, 2, 1], span: [9, 2, 10] } + - { path: [4, 0, 2, 1, 1], span: [9, 4, 5] } + - { path: [4, 0, 2, 1, 3], span: [9, 8, 9] } + - { path: [4, 0, 2, 2, 6], span: [10, 2, 3] } + - { path: [4, 0, 2, 2], span: [10, 2, 10] } + - { path: [4, 0, 2, 2, 1], span: [10, 4, 5] } + - { path: [4, 0, 2, 2, 3], span: [10, 8, 9] } syntax: "proto3" - name: "e.proto" package: "buf.test" @@ -145,7 +256,17 @@ file: type: TYPE_MESSAGE type_name: ".buf.test.D" json_name: "d" - source_code_info: {} + source_code_info.location: + - span: [0, 0, 7, 1] + - { path: [12], span: [0, 0, 18] } + - { path: [2], span: [1, 0, 17] } + - { path: [3, 0], span: [3, 0, 17] } + - { path: [4, 0], span: [5, 0, 7, 1] } + - { path: [4, 0, 1], span: [5, 8, 9] } + - { path: [4, 0, 2, 0, 6], span: [6, 2, 3] } + - { path: [4, 0, 2, 0], span: [6, 2, 10] } + - { path: [4, 0, 2, 0, 1], span: [6, 4, 5] } + - { path: [4, 0, 2, 0, 3], span: [6, 8, 9] } syntax: "proto3" - name: "a6.proto" package: "buf.test" @@ -165,5 +286,20 @@ file: type: TYPE_MESSAGE type_name: ".buf.test.E" json_name: "e" - source_code_info: {} + source_code_info.location: + - span: [0, 0, 9, 1] + - { path: [12], span: [0, 0, 18] } + - { path: [2], span: [1, 0, 17] } + - { path: [3, 0], span: [3, 0, 17] } + - { path: [3, 1], span: [4, 0, 17] } + - { path: [4, 0], span: [6, 0, 9, 1] } + - { path: [4, 0, 1], span: [6, 8, 10] } + - { path: [4, 0, 2, 0, 6], span: [7, 2, 3] } + - { path: [4, 0, 2, 0], span: [7, 2, 10] } + - { path: [4, 0, 2, 0, 1], span: [7, 4, 5] } + - { path: [4, 0, 2, 0, 3], span: [7, 8, 9] } + - { path: [4, 0, 2, 1, 6], span: [8, 2, 3] } + - { path: [4, 0, 2, 1], span: [8, 2, 10] } + - { path: [4, 0, 2, 1, 1], span: [8, 4, 5] } + - { path: [4, 0, 2, 1, 3], span: [8, 8, 9] } syntax: "proto3" diff --git a/experimental/ir/testdata/visibility.proto.yaml.fds.yaml b/experimental/ir/testdata/visibility.proto.yaml.fds.yaml index 8a55f9c2b..26dcdc47e 100644 --- a/experimental/ir/testdata/visibility.proto.yaml.fds.yaml +++ b/experimental/ir/testdata/visibility.proto.yaml.fds.yaml @@ -49,7 +49,52 @@ file: type_name: ".test.strict.N.L" json_name: "nl" options.features.default_symbol_visibility: STRICT - source_code_info: {} + source_code_info.location: + - span: [0, 0, 21, 1] + - { path: [12], span: [0, 0, 17] } + - { path: [2], span: [1, 0, 20] } + - { path: [8], span: [3, 0, 51] } + - { path: [8, 50, 8], span: [3, 0, 51] } + - { path: [4, 0], span: [5, 0, 9, 1] } + - { path: [4, 0, 1], span: [5, 8, 9] } + - { path: [4, 0, 3, 0], span: [6, 2, 14] } + - { path: [4, 0, 3, 0, 1], span: [6, 10, 11] } + - { path: [4, 0, 3, 1], span: [7, 2, 21] } + - { path: [4, 0, 3, 1, 1], span: [7, 17, 18] } + - { path: [4, 0, 3, 2], span: [8, 2, 20] } + - { path: [4, 0, 3, 2, 1], span: [8, 16, 17] } + - { path: [4, 1], span: [10, 0, 12] } + - { path: [4, 1, 1], span: [10, 8, 9] } + - { path: [4, 2], span: [11, 0, 19] } + - { path: [4, 2, 1], span: [11, 15, 16] } + - { path: [4, 3], span: [12, 0, 18] } + - { path: [4, 3, 1], span: [12, 14, 15] } + - { path: [4, 4], span: [14, 0, 21, 1] } + - { path: [4, 4, 1], span: [14, 8, 9] } + - { path: [4, 4, 2, 0, 6], span: [15, 2, 3] } + - { path: [4, 4, 2, 0], span: [15, 2, 10] } + - { path: [4, 4, 2, 0, 1], span: [15, 4, 5] } + - { path: [4, 4, 2, 0, 3], span: [15, 8, 9] } + - { path: [4, 4, 2, 1, 6], span: [16, 2, 3] } + - { path: [4, 4, 2, 1], span: [16, 2, 10] } + - { path: [4, 4, 2, 1, 1], span: [16, 4, 5] } + - { path: [4, 4, 2, 1, 3], span: [16, 8, 9] } + - { path: [4, 4, 2, 2, 6], span: [17, 2, 3] } + - { path: [4, 4, 2, 2], span: [17, 2, 10] } + - { path: [4, 4, 2, 2, 1], span: [17, 4, 5] } + - { path: [4, 4, 2, 2, 3], span: [17, 8, 9] } + - { path: [4, 4, 2, 3, 6], span: [18, 2, 5] } + - { path: [4, 4, 2, 3], span: [18, 2, 13] } + - { path: [4, 4, 2, 3, 1], span: [18, 6, 8] } + - { path: [4, 4, 2, 3, 3], span: [18, 11, 12] } + - { path: [4, 4, 2, 4, 6], span: [19, 2, 5] } + - { path: [4, 4, 2, 4], span: [19, 2, 13] } + - { path: [4, 4, 2, 4, 1], span: [19, 6, 8] } + - { path: [4, 4, 2, 4, 3], span: [19, 11, 12] } + - { path: [4, 4, 2, 5, 6], span: [20, 2, 5] } + - { path: [4, 4, 2, 5], span: [20, 2, 13] } + - { path: [4, 4, 2, 5, 1], span: [20, 6, 8] } + - { path: [4, 4, 2, 5, 3], span: [20, 11, 12] } syntax: "editions" edition: EDITION_2024 - name: "top_level.proto" @@ -102,7 +147,52 @@ file: type_name: ".test.top_level.N.L" json_name: "nl" options.features.default_symbol_visibility: EXPORT_TOP_LEVEL - source_code_info: {} + source_code_info.location: + - span: [0, 0, 21, 1] + - { path: [12], span: [0, 0, 17] } + - { path: [2], span: [1, 0, 23] } + - { path: [8], span: [3, 0, 61] } + - { path: [8, 50, 8], span: [3, 0, 61] } + - { path: [4, 0], span: [5, 0, 9, 1] } + - { path: [4, 0, 1], span: [5, 8, 9] } + - { path: [4, 0, 3, 0], span: [6, 2, 14] } + - { path: [4, 0, 3, 0, 1], span: [6, 10, 11] } + - { path: [4, 0, 3, 1], span: [7, 2, 21] } + - { path: [4, 0, 3, 1, 1], span: [7, 17, 18] } + - { path: [4, 0, 3, 2], span: [8, 2, 20] } + - { path: [4, 0, 3, 2, 1], span: [8, 16, 17] } + - { path: [4, 1], span: [10, 0, 12] } + - { path: [4, 1, 1], span: [10, 8, 9] } + - { path: [4, 2], span: [11, 0, 19] } + - { path: [4, 2, 1], span: [11, 15, 16] } + - { path: [4, 3], span: [12, 0, 18] } + - { path: [4, 3, 1], span: [12, 14, 15] } + - { path: [4, 4], span: [14, 0, 21, 1] } + - { path: [4, 4, 1], span: [14, 8, 9] } + - { path: [4, 4, 2, 0, 6], span: [15, 2, 3] } + - { path: [4, 4, 2, 0], span: [15, 2, 10] } + - { path: [4, 4, 2, 0, 1], span: [15, 4, 5] } + - { path: [4, 4, 2, 0, 3], span: [15, 8, 9] } + - { path: [4, 4, 2, 1, 6], span: [16, 2, 3] } + - { path: [4, 4, 2, 1], span: [16, 2, 10] } + - { path: [4, 4, 2, 1, 1], span: [16, 4, 5] } + - { path: [4, 4, 2, 1, 3], span: [16, 8, 9] } + - { path: [4, 4, 2, 2, 6], span: [17, 2, 3] } + - { path: [4, 4, 2, 2], span: [17, 2, 10] } + - { path: [4, 4, 2, 2, 1], span: [17, 4, 5] } + - { path: [4, 4, 2, 2, 3], span: [17, 8, 9] } + - { path: [4, 4, 2, 3, 6], span: [18, 2, 5] } + - { path: [4, 4, 2, 3], span: [18, 2, 13] } + - { path: [4, 4, 2, 3, 1], span: [18, 6, 8] } + - { path: [4, 4, 2, 3, 3], span: [18, 11, 12] } + - { path: [4, 4, 2, 4, 6], span: [19, 2, 5] } + - { path: [4, 4, 2, 4], span: [19, 2, 13] } + - { path: [4, 4, 2, 4, 1], span: [19, 6, 8] } + - { path: [4, 4, 2, 4, 3], span: [19, 11, 12] } + - { path: [4, 4, 2, 5, 6], span: [20, 2, 5] } + - { path: [4, 4, 2, 5], span: [20, 2, 13] } + - { path: [4, 4, 2, 5, 1], span: [20, 6, 8] } + - { path: [4, 4, 2, 5, 3], span: [20, 11, 12] } syntax: "editions" edition: EDITION_2024 - name: "local.proto" @@ -155,7 +245,52 @@ file: type_name: ".test.local.N.L" json_name: "nl" options.features.default_symbol_visibility: LOCAL_ALL - source_code_info: {} + source_code_info.location: + - span: [0, 0, 21, 1] + - { path: [12], span: [0, 0, 17] } + - { path: [2], span: [1, 0, 19] } + - { path: [8], span: [3, 0, 54] } + - { path: [8, 50, 8], span: [3, 0, 54] } + - { path: [4, 0], span: [5, 0, 9, 1] } + - { path: [4, 0, 1], span: [5, 8, 9] } + - { path: [4, 0, 3, 0], span: [6, 2, 14] } + - { path: [4, 0, 3, 0, 1], span: [6, 10, 11] } + - { path: [4, 0, 3, 1], span: [7, 2, 21] } + - { path: [4, 0, 3, 1, 1], span: [7, 17, 18] } + - { path: [4, 0, 3, 2], span: [8, 2, 20] } + - { path: [4, 0, 3, 2, 1], span: [8, 16, 17] } + - { path: [4, 1], span: [10, 0, 12] } + - { path: [4, 1, 1], span: [10, 8, 9] } + - { path: [4, 2], span: [11, 0, 19] } + - { path: [4, 2, 1], span: [11, 15, 16] } + - { path: [4, 3], span: [12, 0, 18] } + - { path: [4, 3, 1], span: [12, 14, 15] } + - { path: [4, 4], span: [14, 0, 21, 1] } + - { path: [4, 4, 1], span: [14, 8, 9] } + - { path: [4, 4, 2, 0, 6], span: [15, 2, 3] } + - { path: [4, 4, 2, 0], span: [15, 2, 10] } + - { path: [4, 4, 2, 0, 1], span: [15, 4, 5] } + - { path: [4, 4, 2, 0, 3], span: [15, 8, 9] } + - { path: [4, 4, 2, 1, 6], span: [16, 2, 3] } + - { path: [4, 4, 2, 1], span: [16, 2, 10] } + - { path: [4, 4, 2, 1, 1], span: [16, 4, 5] } + - { path: [4, 4, 2, 1, 3], span: [16, 8, 9] } + - { path: [4, 4, 2, 2, 6], span: [17, 2, 3] } + - { path: [4, 4, 2, 2], span: [17, 2, 10] } + - { path: [4, 4, 2, 2, 1], span: [17, 4, 5] } + - { path: [4, 4, 2, 2, 3], span: [17, 8, 9] } + - { path: [4, 4, 2, 3, 6], span: [18, 2, 5] } + - { path: [4, 4, 2, 3], span: [18, 2, 13] } + - { path: [4, 4, 2, 3, 1], span: [18, 6, 8] } + - { path: [4, 4, 2, 3, 3], span: [18, 11, 12] } + - { path: [4, 4, 2, 4, 6], span: [19, 2, 5] } + - { path: [4, 4, 2, 4], span: [19, 2, 13] } + - { path: [4, 4, 2, 4, 1], span: [19, 6, 8] } + - { path: [4, 4, 2, 4, 3], span: [19, 11, 12] } + - { path: [4, 4, 2, 5, 6], span: [20, 2, 5] } + - { path: [4, 4, 2, 5], span: [20, 2, 13] } + - { path: [4, 4, 2, 5, 1], span: [20, 6, 8] } + - { path: [4, 4, 2, 5, 3], span: [20, 11, 12] } syntax: "editions" edition: EDITION_2024 - name: "export.proto" @@ -208,7 +343,52 @@ file: type_name: ".test.export.N.L" json_name: "nl" options.features.default_symbol_visibility: EXPORT_ALL - source_code_info: {} + source_code_info.location: + - span: [0, 0, 21, 1] + - { path: [12], span: [0, 0, 17] } + - { path: [2], span: [1, 0, 20] } + - { path: [8], span: [3, 0, 55] } + - { path: [8, 50, 8], span: [3, 0, 55] } + - { path: [4, 0], span: [5, 0, 9, 1] } + - { path: [4, 0, 1], span: [5, 8, 9] } + - { path: [4, 0, 3, 0], span: [6, 2, 14] } + - { path: [4, 0, 3, 0, 1], span: [6, 10, 11] } + - { path: [4, 0, 3, 1], span: [7, 2, 21] } + - { path: [4, 0, 3, 1, 1], span: [7, 17, 18] } + - { path: [4, 0, 3, 2], span: [8, 2, 20] } + - { path: [4, 0, 3, 2, 1], span: [8, 16, 17] } + - { path: [4, 1], span: [10, 0, 12] } + - { path: [4, 1, 1], span: [10, 8, 9] } + - { path: [4, 2], span: [11, 0, 19] } + - { path: [4, 2, 1], span: [11, 15, 16] } + - { path: [4, 3], span: [12, 0, 18] } + - { path: [4, 3, 1], span: [12, 14, 15] } + - { path: [4, 4], span: [14, 0, 21, 1] } + - { path: [4, 4, 1], span: [14, 8, 9] } + - { path: [4, 4, 2, 0, 6], span: [15, 2, 3] } + - { path: [4, 4, 2, 0], span: [15, 2, 10] } + - { path: [4, 4, 2, 0, 1], span: [15, 4, 5] } + - { path: [4, 4, 2, 0, 3], span: [15, 8, 9] } + - { path: [4, 4, 2, 1, 6], span: [16, 2, 3] } + - { path: [4, 4, 2, 1], span: [16, 2, 10] } + - { path: [4, 4, 2, 1, 1], span: [16, 4, 5] } + - { path: [4, 4, 2, 1, 3], span: [16, 8, 9] } + - { path: [4, 4, 2, 2, 6], span: [17, 2, 3] } + - { path: [4, 4, 2, 2], span: [17, 2, 10] } + - { path: [4, 4, 2, 2, 1], span: [17, 4, 5] } + - { path: [4, 4, 2, 2, 3], span: [17, 8, 9] } + - { path: [4, 4, 2, 3, 6], span: [18, 2, 5] } + - { path: [4, 4, 2, 3], span: [18, 2, 13] } + - { path: [4, 4, 2, 3, 1], span: [18, 6, 8] } + - { path: [4, 4, 2, 3, 3], span: [18, 11, 12] } + - { path: [4, 4, 2, 4, 6], span: [19, 2, 5] } + - { path: [4, 4, 2, 4], span: [19, 2, 13] } + - { path: [4, 4, 2, 4, 1], span: [19, 6, 8] } + - { path: [4, 4, 2, 4, 3], span: [19, 11, 12] } + - { path: [4, 4, 2, 5, 6], span: [20, 2, 5] } + - { path: [4, 4, 2, 5], span: [20, 2, 13] } + - { path: [4, 4, 2, 5, 1], span: [20, 6, 8] } + - { path: [4, 4, 2, 5, 3], span: [20, 11, 12] } syntax: "editions" edition: EDITION_2024 - name: "main.proto" @@ -367,6 +547,117 @@ file: type: TYPE_MESSAGE type_name: ".test.strict.N.L" json_name: "nl" - source_code_info: {} + source_code_info.location: + - span: [0, 0, 42, 1] + - { path: [12], span: [0, 0, 17] } + - { path: [2], span: [1, 0, 13] } + - { path: [3, 0], span: [3, 0, 22] } + - { path: [3, 1], span: [4, 0, 21] } + - { path: [3, 2], span: [5, 0, 25] } + - { path: [3, 3], span: [6, 0, 22] } + - { path: [4, 0], span: [8, 0, 15, 1] } + - { path: [4, 0, 1], span: [8, 8, 14] } + - { path: [4, 0, 2, 0, 6], span: [9, 2, 10] } + - { path: [4, 0, 2, 0], span: [9, 2, 17] } + - { path: [4, 0, 2, 0, 1], span: [9, 11, 12] } + - { path: [4, 0, 2, 0, 3], span: [9, 15, 16] } + - { path: [4, 0, 2, 1, 6], span: [10, 2, 10] } + - { path: [4, 0, 2, 1], span: [10, 2, 17] } + - { path: [4, 0, 2, 1, 1], span: [10, 11, 12] } + - { path: [4, 0, 2, 1, 3], span: [10, 15, 16] } + - { path: [4, 0, 2, 2, 6], span: [11, 2, 10] } + - { path: [4, 0, 2, 2], span: [11, 2, 17] } + - { path: [4, 0, 2, 2, 1], span: [11, 11, 12] } + - { path: [4, 0, 2, 2, 3], span: [11, 15, 16] } + - { path: [4, 0, 2, 3, 6], span: [12, 2, 12] } + - { path: [4, 0, 2, 3], span: [12, 2, 20] } + - { path: [4, 0, 2, 3, 1], span: [12, 13, 15] } + - { path: [4, 0, 2, 3, 3], span: [12, 18, 19] } + - { path: [4, 0, 2, 4, 6], span: [13, 2, 12] } + - { path: [4, 0, 2, 4], span: [13, 2, 20] } + - { path: [4, 0, 2, 4, 1], span: [13, 13, 15] } + - { path: [4, 0, 2, 4, 3], span: [13, 18, 19] } + - { path: [4, 0, 2, 5, 6], span: [14, 2, 12] } + - { path: [4, 0, 2, 5], span: [14, 2, 20] } + - { path: [4, 0, 2, 5, 1], span: [14, 13, 15] } + - { path: [4, 0, 2, 5, 3], span: [14, 18, 19] } + - { path: [4, 1], span: [17, 0, 24, 1] } + - { path: [4, 1, 1], span: [17, 8, 13] } + - { path: [4, 1, 2, 0, 6], span: [18, 2, 9] } + - { path: [4, 1, 2, 0], span: [18, 2, 16] } + - { path: [4, 1, 2, 0, 1], span: [18, 10, 11] } + - { path: [4, 1, 2, 0, 3], span: [18, 14, 15] } + - { path: [4, 1, 2, 1, 6], span: [19, 2, 9] } + - { path: [4, 1, 2, 1], span: [19, 2, 16] } + - { path: [4, 1, 2, 1, 1], span: [19, 10, 11] } + - { path: [4, 1, 2, 1, 3], span: [19, 14, 15] } + - { path: [4, 1, 2, 2, 6], span: [20, 2, 9] } + - { path: [4, 1, 2, 2], span: [20, 2, 16] } + - { path: [4, 1, 2, 2, 1], span: [20, 10, 11] } + - { path: [4, 1, 2, 2, 3], span: [20, 14, 15] } + - { path: [4, 1, 2, 3, 6], span: [21, 2, 11] } + - { path: [4, 1, 2, 3], span: [21, 2, 19] } + - { path: [4, 1, 2, 3, 1], span: [21, 12, 14] } + - { path: [4, 1, 2, 3, 3], span: [21, 17, 18] } + - { path: [4, 1, 2, 4, 6], span: [22, 2, 11] } + - { path: [4, 1, 2, 4], span: [22, 2, 19] } + - { path: [4, 1, 2, 4, 1], span: [22, 12, 14] } + - { path: [4, 1, 2, 4, 3], span: [22, 17, 18] } + - { path: [4, 1, 2, 5, 6], span: [23, 2, 11] } + - { path: [4, 1, 2, 5], span: [23, 2, 19] } + - { path: [4, 1, 2, 5, 1], span: [23, 12, 14] } + - { path: [4, 1, 2, 5, 3], span: [23, 17, 18] } + - { path: [4, 2], span: [26, 0, 33, 1] } + - { path: [4, 2, 1], span: [26, 8, 16] } + - { path: [4, 2, 2, 0, 6], span: [27, 2, 13] } + - { path: [4, 2, 2, 0], span: [27, 2, 20] } + - { path: [4, 2, 2, 0, 1], span: [27, 14, 15] } + - { path: [4, 2, 2, 0, 3], span: [27, 18, 19] } + - { path: [4, 2, 2, 1, 6], span: [28, 2, 13] } + - { path: [4, 2, 2, 1], span: [28, 2, 20] } + - { path: [4, 2, 2, 1, 1], span: [28, 14, 15] } + - { path: [4, 2, 2, 1, 3], span: [28, 18, 19] } + - { path: [4, 2, 2, 2, 6], span: [29, 2, 13] } + - { path: [4, 2, 2, 2], span: [29, 2, 20] } + - { path: [4, 2, 2, 2, 1], span: [29, 14, 15] } + - { path: [4, 2, 2, 2, 3], span: [29, 18, 19] } + - { path: [4, 2, 2, 3, 6], span: [30, 2, 15] } + - { path: [4, 2, 2, 3], span: [30, 2, 23] } + - { path: [4, 2, 2, 3, 1], span: [30, 16, 18] } + - { path: [4, 2, 2, 3, 3], span: [30, 21, 22] } + - { path: [4, 2, 2, 4, 6], span: [31, 2, 15] } + - { path: [4, 2, 2, 4], span: [31, 2, 23] } + - { path: [4, 2, 2, 4, 1], span: [31, 16, 18] } + - { path: [4, 2, 2, 4, 3], span: [31, 21, 22] } + - { path: [4, 2, 2, 5, 6], span: [32, 2, 15] } + - { path: [4, 2, 2, 5], span: [32, 2, 23] } + - { path: [4, 2, 2, 5, 1], span: [32, 16, 18] } + - { path: [4, 2, 2, 5, 3], span: [32, 21, 22] } + - { path: [4, 3], span: [35, 0, 42, 1] } + - { path: [4, 3, 1], span: [35, 8, 14] } + - { path: [4, 3, 2, 0, 6], span: [36, 2, 10] } + - { path: [4, 3, 2, 0], span: [36, 2, 17] } + - { path: [4, 3, 2, 0, 1], span: [36, 11, 12] } + - { path: [4, 3, 2, 0, 3], span: [36, 15, 16] } + - { path: [4, 3, 2, 1, 6], span: [37, 2, 10] } + - { path: [4, 3, 2, 1], span: [37, 2, 17] } + - { path: [4, 3, 2, 1, 1], span: [37, 11, 12] } + - { path: [4, 3, 2, 1, 3], span: [37, 15, 16] } + - { path: [4, 3, 2, 2, 6], span: [38, 2, 10] } + - { path: [4, 3, 2, 2], span: [38, 2, 17] } + - { path: [4, 3, 2, 2, 1], span: [38, 11, 12] } + - { path: [4, 3, 2, 2, 3], span: [38, 15, 16] } + - { path: [4, 3, 2, 3, 6], span: [39, 2, 12] } + - { path: [4, 3, 2, 3], span: [39, 2, 20] } + - { path: [4, 3, 2, 3, 1], span: [39, 13, 15] } + - { path: [4, 3, 2, 3, 3], span: [39, 18, 19] } + - { path: [4, 3, 2, 4, 6], span: [40, 2, 12] } + - { path: [4, 3, 2, 4], span: [40, 2, 20] } + - { path: [4, 3, 2, 4, 1], span: [40, 13, 15] } + - { path: [4, 3, 2, 4, 3], span: [40, 18, 19] } + - { path: [4, 3, 2, 5, 6], span: [41, 2, 12] } + - { path: [4, 3, 2, 5], span: [41, 2, 20] } + - { path: [4, 3, 2, 5, 1], span: [41, 13, 15] } + - { path: [4, 3, 2, 5, 3], span: [41, 18, 19] } syntax: "editions" edition: EDITION_2024 diff --git a/experimental/source/span.go b/experimental/source/span.go index d308ac79d..8f0808631 100644 --- a/experimental/source/span.go +++ b/experimental/source/span.go @@ -227,6 +227,20 @@ func GetSpan(s Spanner) Span { return s.Span() } +// Between is a helper function that returns a [Span] for the space between spans a and b, +// inclusive. If a and b do not have the same [File] or if the spans overlap, then this +// returns a zero span. +func Between(a, b Span) Span { + if a.File != b.File || b.Start < a.End { + return Span{} + } + return Span{ + File: a.File, + Start: a.Start, + End: b.End, + } +} + // idxToByteOffset converts a byte index into s into a byte offset. // // If i is negative, this produces the index of the -ith byte from the end of diff --git a/experimental/token/cursor.go b/experimental/token/cursor.go index 5213d5048..73e6ba506 100644 --- a/experimental/token/cursor.go +++ b/experimental/token/cursor.go @@ -17,6 +17,7 @@ package token import ( "fmt" "iter" + "strings" "github.com/bufbuild/protocompile/experimental/id" "github.com/bufbuild/protocompile/experimental/source" @@ -111,7 +112,7 @@ func (c *Cursor) Mark() CursorMark { // Panics if mark was not created using this cursor's Mark method. func (c *Cursor) Rewind(mark CursorMark) { if c != mark.owner { - panic("protocompile/ast: rewound cursor using the wrong cursor's mark") + panic("protocompile/token: rewound cursor using the wrong cursor's mark") } c.idx = mark.idx c.isBackwards = mark.isBackwards @@ -312,3 +313,29 @@ func (c *Cursor) SeekToEnd() (Token, source.Span) { tok := id.Wrap(c.Context(), ID(c.idx+1)) return tok, stream.Span(tok.offsets()) } + +// NewLinesBetween counts the number of \n characters between the end of [token.Token] a +// and the start of b, up to the limit. +// +// The final rune of a is included in this count, since comments may end in a \n rune. +func (c *Cursor) NewLinesBetween(a, b Token, limit int) int { + end := a.LeafSpan().End + if end != 0 { + // Account for the final rune of a + end-- + } + + start := b.LeafSpan().Start + between := c.Context().Text()[end:start] + + var total int + for total < limit { + var found bool + _, between, found = strings.Cut(between, "\n") + if !found { + break + } + total++ + } + return total +} diff --git a/internal/ext/slicesx/dedup.go b/internal/ext/slicesx/dedup.go index 15f36ee64..e7807bceb 100644 --- a/internal/ext/slicesx/dedup.go +++ b/internal/ext/slicesx/dedup.go @@ -26,16 +26,28 @@ func DedupKey[S ~[]E, E any, K comparable]( key func(E) K, choose func([]E) E, ) S { + return dedup(s, func(a, b E) bool { return key(a) == key(b) }, choose) +} + +// DedupFunc deduplicates consecutive elements in a slice based on the equal function. If +// equal returns true, then two elements are considered duplicates, and we always pick the +// first element to keep. +func DedupFunc[S ~[]E, E any](s S, equal func(E, E) bool) S { + return dedup(s, equal, func(e []E) E { return e[0] }) +} + +func dedup[S ~[]E, E any](s S, equal func(E, E) bool, choose func([]E) E) S { if len(s) == 0 { return s } i := 0 // Index to write the next value at. j := 0 // Index of prev. - prev := key(s[0]) + + prev := s[i] for k := 1; k < len(s); k++ { - next := key(s[k]) - if prev == next { + next := s[k] + if equal(prev, next) { continue } diff --git a/internal/tags.go b/internal/tags.go index b605e68a5..48a56f555 100644 --- a/internal/tags.go +++ b/internal/tags.go @@ -76,6 +76,9 @@ const ( // FileWeakDependencyTag is the tag number of the weak dependency element // in a file descriptor proto. FileWeakDependencyTag = 11 + // FileOptionDependencyTag is the tag number of the option dependency element + // in a file descriptor proto. + FileOptionDependencyTag = 15 // FileSyntaxTag is the tag number of the syntax element in a file // descriptor proto. FileSyntaxTag = 12 diff --git a/internal/testing/dualcompiler/new_adapter.go b/internal/testing/dualcompiler/new_adapter.go index 241441e7c..afc4cc605 100644 --- a/internal/testing/dualcompiler/new_adapter.go +++ b/internal/testing/dualcompiler/new_adapter.go @@ -24,6 +24,7 @@ import ( "google.golang.org/protobuf/reflect/protoregistry" "google.golang.org/protobuf/types/descriptorpb" + "github.com/bufbuild/protocompile/experimental/fdp" "github.com/bufbuild/protocompile/experimental/incremental" "github.com/bufbuild/protocompile/experimental/incremental/queries" "github.com/bufbuild/protocompile/experimental/ir" @@ -33,9 +34,10 @@ import ( // newCompilerAdapter wraps the experimental incremental compiler. type newCompilerAdapter struct { - executor *incremental.Executor - opener source.Opener - session *ir.Session + executor *incremental.Executor + opener source.Opener + session *ir.Session + includeSourceCodeInfo bool } // NewNewCompiler creates a new CompilerInterface wrapping the experimental compiler. @@ -58,10 +60,16 @@ func NewNewCompiler(opts ...CompilerOption) CompilerInterface { opener = source.WKTs() } + var includeSourceCodeInfo bool + if config.sourceInfoMode != 0 { + includeSourceCodeInfo = true + } + return &newCompilerAdapter{ - executor: incremental.New(), - opener: opener, - session: &ir.Session{}, + executor: incremental.New(), + opener: opener, + session: &ir.Session{}, + includeSourceCodeInfo: includeSourceCodeInfo, } } @@ -105,13 +113,15 @@ func (a *newCompilerAdapter) Compile(ctx context.Context, files ...string) (Comp } return &newCompilationResult{ - files: irFiles, + files: irFiles, + includeSourceCodeInfo: a.includeSourceCodeInfo, }, nil } // newCompilationResult wraps IR files. type newCompilationResult struct { - files []*ir.File + files []*ir.File + includeSourceCodeInfo bool } // Files implements CompilationResult. @@ -119,7 +129,8 @@ func (r *newCompilationResult) Files() []CompiledFile { result := make([]CompiledFile, len(r.files)) for i, file := range r.files { result[i] = &newCompiledFile{ - file: file, + file: file, + includeSourceCodeInfo: r.includeSourceCodeInfo, } } return result @@ -127,7 +138,8 @@ func (r *newCompilationResult) Files() []CompiledFile { // newCompiledFile wraps an ir.File. type newCompiledFile struct { - file *ir.File + file *ir.File + includeSourceCodeInfo bool } // Path implements CompiledFile. @@ -149,7 +161,10 @@ func (f *newCompiledFile) FileDescriptor() (protoreflect.FileDescriptor, error) // FileDescriptorProto implements CompiledFile. func (f *newCompiledFile) FileDescriptorProto() (*descriptorpb.FileDescriptorProto, error) { - data, err := ir.DescriptorProtoBytes(f.file) + data, err := fdp.DescriptorProtoBytes( + f.file, + fdp.IncludeSourceCodeInfo(f.includeSourceCodeInfo), + ) if err != nil { return nil, err } diff --git a/sourceinfo/source_code_info.go b/sourceinfo/source_code_info.go index 0bb36610b..320aaa5d5 100644 --- a/sourceinfo/source_code_info.go +++ b/sourceinfo/source_code_info.go @@ -149,7 +149,10 @@ func generateSourceInfoForFile(opts OptionIndex, sci *sourceCodeInfo, file *ast. sci.newLocWithComments(file.Syntax, append(path, internal.FileSyntaxTag)) } if file.Edition != nil { - sci.newLocWithComments(file.Edition, append(path, internal.FileEditionTag)) + // Despite editions having its own field, protoc behavior sets the path in source code + // info as [internal.FileSyntaxTag] and this is vaguely outlined in descriptor.proto + // https://github.com/protocolbuffers/protobuf/blob/22e1e6bd90aa8dc35f8cc28b5d7fc03858060f0b/src/google/protobuf/descriptor.proto#L137-L144 + sci.newLocWithComments(file.Edition, append(path, internal.FileSyntaxTag)) } var depIndex, pubDepIndex, weakDepIndex, optIndex, msgIndex, enumIndex, extendIndex, svcIndex int32 @@ -383,6 +386,16 @@ func generateSourceCodeInfoForMessage(opts OptionIndex, sci *sourceCodeInfo, n a reservedNameIndex++ } } + // For editions, reserved names are identifiers. + if len(child.Identifiers) > 0 { + resPath := path + resPath = append(resPath, internal.MessageReservedNamesTag) + sci.newLocWithComments(child, resPath) + for _, rn := range child.Identifiers { + sci.newLoc(rn, append(resPath, reservedNameIndex)) + reservedNameIndex++ + } + } if len(child.Ranges) > 0 { resPath := path resPath = append(resPath, internal.MessageReservedRangesTag)