Skip to content

Commit 528017d

Browse files
jsharpeclaude
andcommitted
Add tests and refactor buildInfo metadata for memory efficiency
Adds comprehensive tests for buildInfo metadata collection and refactors the implementation to address review comments on memory efficiency. Tests Added: - tests/core/buildinfo/metadata_test.go: Runtime test validating BuildInfo embedding for internal dependencies with multi-level dependency chains - tests/core/buildinfo/external_deps_test.go: Runtime test for external dependencies with real version metadata from package_info - Supporting test infrastructure including BUILD.bazel and README.md Refactoring Changes (addressing review comments from @fmeum): 1. buildinfo_aspect.bzl - Changed provider schema: - Split version_map into separate importpaths and metadata_providers depsets - Only traverse deps/embed attributes instead of all attributes (*) - Defer version matching to execution time to avoid quadratic memory usage - Extract empty provider as shared constant (_EMPTY_BUILDINFO_METADATA) - Remove applicable_licenses fallback (only Bazel 6+ supported) 2. archive.bzl - Removed quadratic aggregation: - Removed _buildinfo_deps tuple collection from GoArchiveData - BuildInfo metadata now collected only via aspect, not in archives - Eliminates O(n²) memory growth with dependency depth 3. link.bzl - Deferred materialization: - Updated to receive buildinfo_metadata provider instead of pre-computed tuples - Materialize depsets only once at link time - Extract versions from package_metadata providers on-demand 4. binary.bzl & rules/binary.bzl - Simplified metadata passing: - Pass BuildInfoMetadata provider directly instead of creating version_map file - Removed intermediate file generation and tuple aggregation The refactoring maintains the same functionality while significantly reducing memory usage for large dependency graphs by deferring depset materialization until the final link step. Tests validate: - BuildInfo is properly embedded in binaries - Main package path and Go version are included - All transitive dependencies appear in dependency list - Internal packages show "(devel)" as version - External dependencies with package_metadata show real versions 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent 348bf89 commit 528017d

File tree

17 files changed

+660
-252
lines changed

17 files changed

+660
-252
lines changed

go/private/actions/archive.bzl

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -167,28 +167,7 @@ def emit_archive(go, source = None, _recompile_suffix = "", recompile_internal_d
167167
is_external_pkg = is_external_pkg,
168168
)
169169

170-
# Collect buildinfo dependency metadata from transitive closure
171-
# Format: (importpath, version_string) tuples
172-
# version_string uses "v0.0.0" as a valid pseudo-version
173-
dep_buildinfo = []
174-
dep_buildinfo_set = {}
175-
176-
# Add direct dependencies' buildinfo
177-
for d in direct:
178-
# Add this dependency's own metadata
179-
dep_key = d.data.importpath
180-
if dep_key not in dep_buildinfo_set and d.data.importpath:
181-
# Use (devel) as sentinel - invalid version that will be replaced
182-
# with real version from PackageInfo or filtered out if internal
183-
version = "(devel)"
184-
dep_buildinfo.append((d.data.importpath, version))
185-
dep_buildinfo_set[dep_key] = None
186-
187-
# Add transitive dependencies
188-
for dep_path, dep_version in getattr(d.data, "_buildinfo_deps", ()):
189-
if dep_path not in dep_buildinfo_set:
190-
dep_buildinfo.append((dep_path, dep_version))
191-
dep_buildinfo_set[dep_path] = None
170+
# Buildinfo metadata is collected via aspect and stored separately
192171

193172
data = GoArchiveData(
194173
# TODO(#2578): reconsider the provider API. There's a lot of redundant
@@ -220,9 +199,6 @@ def emit_archive(go, source = None, _recompile_suffix = "", recompile_internal_d
220199
# Information on dependencies
221200
_dep_labels = tuple([d.data.label for d in direct]),
222201

223-
# BuildInfo dependency metadata for Go 1.18+ (deterministic)
224-
_buildinfo_deps = tuple(dep_buildinfo),
225-
226202
# Information needed by dependents
227203
file = out_lib,
228204
export_file = out_export,

go/private/actions/binary.bzl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def emit_binary(
3434
gc_linkopts = [],
3535
version_file = None,
3636
info_file = None,
37-
version_map = None,
37+
buildinfo_metadata = None,
3838
target_label = None,
3939
executable = None):
4040
"""See go/toolchains.rst#binary for full documentation."""
@@ -63,7 +63,7 @@ def emit_binary(
6363
gc_linkopts = gc_linkopts,
6464
version_file = version_file,
6565
info_file = info_file,
66-
version_map = version_map,
66+
buildinfo_metadata = buildinfo_metadata,
6767
target_label = target_label,
6868
)
6969
cgo_dynamic_deps = [

go/private/actions/link.bzl

Lines changed: 42 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def emit_link(
4545
gc_linkopts = [],
4646
version_file = None,
4747
info_file = None,
48-
version_map = None,
48+
buildinfo_metadata = None,
4949
target_label = None):
5050
"""See go/toolchains.rst#link for full documentation."""
5151

@@ -56,23 +56,55 @@ def emit_link(
5656

5757
# Generate buildinfo dependency file for Go 1.18+ buildInfo support
5858
buildinfo_file = None
59-
if hasattr(archive.data, "_buildinfo_deps") and archive.data._buildinfo_deps:
59+
if buildinfo_metadata:
6060
buildinfo_file = go.declare_file(go, path = executable.basename + ".buildinfo.txt")
61-
buildinfo_content = ""
61+
62+
# Materialize depsets at link time
63+
importpaths = buildinfo_metadata.importpaths.to_list()
64+
metadata_targets = buildinfo_metadata.metadata_providers.to_list()
65+
66+
# Build version map from metadata providers
67+
# Sort targets by label for deterministic version resolution
68+
# This ensures reproducible builds and consistent action cache keys
69+
sorted_targets = sorted(
70+
metadata_targets,
71+
key = lambda t: str(t.label) if hasattr(t, "label") else str(t),
72+
)
73+
74+
version_map = {}
75+
for target in sorted_targets:
76+
# Extract version info from PackageInfo provider
77+
if hasattr(target, "package_info"):
78+
# Handle both single object and list of package_info
79+
package_infos = target.package_info if type(target.package_info) == type([]) else [target.package_info]
80+
for info in package_infos:
81+
if hasattr(info, "module") and hasattr(info, "version"):
82+
module = info.module
83+
version = info.version
84+
# Use first version (by sorted label order) if duplicates exist
85+
# This makes conflicts deterministic and debuggable
86+
if module not in version_map:
87+
version_map[module] = version
88+
89+
# Build buildinfo content
90+
content_lines = []
6291

6392
# Add main package path
6493
if archive.data.importpath:
65-
buildinfo_content += "path\t{}\n".format(archive.data.importpath)
94+
content_lines.append("path\t{}".format(archive.data.importpath))
6695

67-
# Add dependencies in sorted order for determinism
68-
deps_list = sorted(archive.data._buildinfo_deps, key = lambda x: x[0])
69-
for dep_path, dep_version in deps_list:
70-
# Format: dep\t<importpath>\t<version>
71-
buildinfo_content += "dep\t{}\t{}\n".format(dep_path, dep_version)
96+
# Add dependencies with versions
97+
# Sort importpaths for deterministic output, which is required for:
98+
# 1. Bazel action caching - identical inputs must produce identical outputs
99+
# 2. Reproducible builds across different machines
100+
# 3. Easier debugging and testing with predictable output order
101+
for importpath in sorted(importpaths):
102+
version = version_map.get(importpath, "(devel)")
103+
content_lines.append("dep\t{}\t{}".format(importpath, version))
72104

73105
go.actions.write(
74106
output = buildinfo_file,
75-
content = buildinfo_content,
107+
content = "\n".join(content_lines) + "\n" if content_lines else "",
76108
)
77109

78110
# Exclude -lstdc++ from link options. We don't want to link against it
@@ -194,8 +226,6 @@ def emit_link(
194226
# Pass buildinfo file to builder if available
195227
if buildinfo_file:
196228
builder_args.add("-buildinfo", buildinfo_file)
197-
if version_map:
198-
builder_args.add("-versionmap", version_map)
199229
if target_label:
200230
builder_args.add("-bazeltarget", target_label)
201231

@@ -211,8 +241,6 @@ def emit_link(
211241
inputs_direct = stamp_inputs + [go.sdk.package_list]
212242
if buildinfo_file:
213243
inputs_direct.append(buildinfo_file)
214-
if version_map:
215-
inputs_direct.append(version_map)
216244
if go.coverage_enabled and go.coverdata:
217245
inputs_direct.append(go.coverdata.data.file)
218246
inputs_transitive = [

go/private/aspects/buildinfo_aspect.bzl

Lines changed: 44 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -5,27 +5,33 @@ from Gazelle-generated package_info targets referenced via the package_metadata
55
common attribute (inherited from REPO.bazel default_package_metadata).
66
77
Implementation based on bazel-contrib/supply-chain gather_metadata pattern.
8+
Currently doesn't use the supply chain tools dep for this as it is not yet
9+
stable and we still need to support WORKSPACE which the supply-chain tools
10+
doesn't have support for.
811
"""
912

1013
load(
1114
"//go/private:providers.bzl",
1215
"GoArchive",
1316
)
1417

18+
visibility(["//go/private/..."])
19+
1520
BuildInfoMetadata = provider(
16-
doc = "Provides dependency version metadata for buildInfo",
21+
doc = "INTERNAL: Provides dependency version metadata for buildInfo. Do not depend on this provider.",
1722
fields = {
18-
"version_map": "Depset of (importpath, version) tuples",
23+
"importpaths": "Depset of importpath strings from Go dependencies",
24+
"metadata_providers": "Depset of PackageInfo providers with version data",
1925
},
2026
)
2127

2228
def _buildinfo_aspect_impl(target, ctx):
2329
"""Collects package_info metadata from dependencies.
2430
25-
Following bazel-contrib/supply-chain gather_metadata pattern, this checks:
26-
1. package_metadata common attribute (from REPO.bazel default_package_metadata)
27-
2. applicable_licenses attribute (fallback for older configs)
28-
3. Direct package_info providers on the target itself
31+
This aspect collects version information from package_metadata attributes
32+
(set via REPO.bazel default_package_metadata in go_repository) and tracks
33+
importpaths from Go dependencies. The actual version matching is deferred
34+
to execution time to avoid quadratic memory usage.
2935
3036
Args:
3137
target: The target being visited
@@ -34,69 +40,59 @@ def _buildinfo_aspect_impl(target, ctx):
3440
Returns:
3541
List containing BuildInfoMetadata provider
3642
"""
37-
direct_versions = []
38-
39-
# Approach 1: Check package_metadata common attribute (Bazel 5.4+)
43+
direct_importpaths = []
44+
direct_metadata = []
45+
transitive_importpaths = []
46+
transitive_metadata = []
47+
48+
# Collect importpath from this target if it's a Go target
49+
if GoArchive in target:
50+
importpath = target[GoArchive].data.importpath
51+
if importpath:
52+
direct_importpaths.append(importpath)
53+
54+
# Check package_metadata common attribute (Bazel 6+)
4055
# This is set via REPO.bazel default_package_metadata in go_repository
41-
package_metadata = []
4256
if hasattr(ctx.rule.attr, "package_metadata"):
4357
attr_value = ctx.rule.attr.package_metadata
4458
if attr_value:
45-
package_metadata = attr_value if type(attr_value) == "list" else [attr_value]
46-
47-
# Approach 2: Check applicable_licenses (legacy compatibility)
48-
if not package_metadata and hasattr(ctx.rule.attr, "applicable_licenses"):
49-
attr_value = ctx.rule.attr.applicable_licenses
50-
if attr_value:
51-
package_metadata = attr_value if type(attr_value) == "list" else [attr_value]
52-
53-
# Collect metadata from transitive dependencies (supply-chain pattern)
54-
transitive_depsets = []
55-
56-
# Traverse all attributes (supply-chain uses attr_aspects = ["*"])
57-
attrs = [attr for attr in dir(ctx.rule.attr)]
58-
for attr_name in attrs:
59-
# Skip private attributes
60-
if attr_name.startswith("_"):
59+
package_metadata = attr_value if type(attr_value) == type([]) else [attr_value]
60+
# Store the metadata targets directly for later processing
61+
direct_metadata.extend(package_metadata)
62+
63+
# Collect transitive metadata from Go dependencies only
64+
# Only traverse deps and embed to avoid non-Go dependencies
65+
for attr_name in ["deps", "embed"]:
66+
if not hasattr(ctx.rule.attr, attr_name):
6167
continue
6268

6369
attr_value = getattr(ctx.rule.attr, attr_name)
6470
if not attr_value:
6571
continue
6672

67-
# Handle both lists and single values
68-
deps = attr_value if type(attr_value) == "list" else [attr_value]
69-
73+
deps = attr_value if type(attr_value) == type([]) else [attr_value]
7074
for dep in deps:
71-
# Only process Target types
72-
if type(dep) != "Target":
73-
continue
74-
7575
# Collect transitive BuildInfoMetadata
7676
if BuildInfoMetadata in dep:
77-
transitive_depsets.append(dep[BuildInfoMetadata].version_map)
78-
79-
# Return using supply-chain memory optimization pattern
80-
if not direct_versions and not transitive_depsets:
81-
# No metadata at all, return empty provider
82-
return [BuildInfoMetadata(version_map = depset([]))]
77+
transitive_importpaths.append(dep[BuildInfoMetadata].importpaths)
78+
transitive_metadata.append(dep[BuildInfoMetadata].metadata_providers)
8379

84-
if not direct_versions and len(transitive_depsets) == 1:
85-
# Only one transitive depset, pass it up directly to save memory
86-
return [BuildInfoMetadata(version_map = transitive_depsets[0])]
87-
88-
# Combine direct and transitive metadata
80+
# Build depsets (empty depsets are efficient, no need for early return)
8981
return [BuildInfoMetadata(
90-
version_map = depset(
91-
direct = direct_versions,
92-
transitive = transitive_depsets,
82+
importpaths = depset(
83+
direct = direct_importpaths,
84+
transitive = transitive_importpaths,
85+
),
86+
metadata_providers = depset(
87+
direct = direct_metadata,
88+
transitive = transitive_metadata,
9389
),
9490
)]
9591

9692
buildinfo_aspect = aspect(
9793
doc = "Collects package_info metadata for Go buildInfo",
9894
implementation = _buildinfo_aspect_impl,
99-
attr_aspects = ["*"], # Traverse all attributes (supply-chain pattern)
95+
attr_aspects = ["deps", "embed"], # Only traverse Go dependencies
10096
provides = [BuildInfoMetadata],
10197
# Apply to generated targets including package_info
10298
apply_to_generating_rules = True,

go/private/rules/binary.bzl

Lines changed: 17 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -162,36 +162,22 @@ def _go_binary_impl(ctx):
162162
)
163163

164164
# Collect version metadata from aspect for buildInfo
165-
version_map_file = None
166-
version_tuples = []
167-
168-
# Collect from embed attribute if present
169-
if hasattr(ctx.attr, "embed"):
170-
for embed in ctx.attr.embed:
171-
if BuildInfoMetadata in embed:
172-
version_tuples.extend(embed[BuildInfoMetadata].version_map.to_list())
173-
174-
# Collect from deps attribute if present
175-
if hasattr(ctx.attr, "deps"):
176-
for dep in ctx.attr.deps:
177-
if BuildInfoMetadata in dep:
178-
version_tuples.extend(dep[BuildInfoMetadata].version_map.to_list())
179-
180-
# Generate version map file if we have versions
181-
if version_tuples:
182-
version_map_file = ctx.actions.declare_file(ctx.label.name + "_versions.txt")
183-
184-
# Sort and deduplicate
185-
version_dict = {}
186-
for importpath, version in version_tuples:
187-
if importpath not in version_dict:
188-
version_dict[importpath] = version
189-
190-
# Write tab-separated file
191-
lines = ["{}\t{}".format(imp, ver) for imp, ver in sorted(version_dict.items())]
192-
ctx.actions.write(
193-
output = version_map_file,
194-
content = "\n".join(lines) + "\n" if lines else "",
165+
# Merge ALL metadata from all deps and embed targets to ensure complete coverage
166+
all_importpaths = []
167+
all_metadata = []
168+
for attr_name in ["embed", "deps"]:
169+
if not hasattr(ctx.attr, attr_name):
170+
continue
171+
for target in getattr(ctx.attr, attr_name):
172+
if BuildInfoMetadata in target:
173+
all_importpaths.append(target[BuildInfoMetadata].importpaths)
174+
all_metadata.append(target[BuildInfoMetadata].metadata_providers)
175+
176+
buildinfo_metadata = None
177+
if all_importpaths:
178+
buildinfo_metadata = BuildInfoMetadata(
179+
importpaths = depset(transitive = all_importpaths),
180+
metadata_providers = depset(transitive = all_metadata),
195181
)
196182

197183
# Get Bazel target label for buildInfo
@@ -213,7 +199,7 @@ def _go_binary_impl(ctx):
213199
version_file = ctx.version_file,
214200
info_file = ctx.info_file,
215201
executable = executable,
216-
version_map = version_map_file,
202+
buildinfo_metadata = buildinfo_metadata,
217203
target_label = target_label,
218204
)
219205
validation_output = archive.data._validation_output

0 commit comments

Comments
 (0)