Skip to content

Commit ca8f6e1

Browse files
authored
Fix: Bzl6 compatible README searching (#13)
Adjust the implementation of searching for a README file to avoid the bzl7 `path.is_dir` feature. Instead just scan a bunch of paths and test that they exist. This has the positive side-effect of making the search order platform independent. ### Changes are visible to end-users: no ### Test plan - Manual testing; please provide instructions so we can reproduce: Testing on Bazel 6.5.0 which had previously regressed ``` ❯ USE_BAZEL_VERSION=6.5.0 aspect build --enable_bzlmod //:report.json && cat ./bazel-bin/report.json Starting local Bazel server and connecting to it... INFO: Analyzed target //:report.json (6 packages loaded, 9 targets configured). INFO: Found 1 target... Target //:report.json up-to-date: bazel-bin/report.json INFO: Elapsed time: 3.190s, Critical Path: 0.02s INFO: 1 process: 1 internal. INFO: Build completed successfully, 1 total action { "tools_telemetry": { "arch": "aarch64", "bazel_version": "6.5.0", "bazelisk": true, "ci": false, "counter": null, "deps": { "aspect_tools_telemetry": "0.0.0", "simple-example": "0.0.0" }, "has_bazel_module": true, "has_bazel_prelude": false, "has_bazel_tool": false, "has_bazel_workspace": true, "id": "README.md", "org": null, "os": "mac os x", "runner": null, "shell": "/bin/zsh", "user": "c108c75db8fa0a8414347aa58cef219be7a41e08" } }% ``` Touching the WORKSPACE files was required to make this 6.5.0 exercise succeed, even with bzlmod.
1 parent dd3b3df commit ca8f6e1

File tree

4 files changed

+59
-26
lines changed

4 files changed

+59
-26
lines changed

WORKSPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# This project uses bzlmod
2+
# The WORKSPACE file is retained only for legacy Bazels

collectors/fingerprinting.bzl

Lines changed: 54 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,53 +9,82 @@ load(":utils.bzl", "hash")
99
def _repo_id(repository_ctx):
1010
"""Try to extract an aggregation ID from the repo context.
1111
12-
Ideally we want to use the first few (usually stable!) lines from a highly
13-
stable file such as the README. This will provide a consistent aggregation
14-
ID regardless of whether a project is checked out locally or remotely.
15-
16-
Note that the repo ID doesn't depend on the org name, since the org name
17-
cannot be determined on workstations but we do want to count CI vs
18-
workstation builds for a single repo consistently.
12+
This strategy scans for a README-like file in some known locations and known
13+
formats and hashes the first few lines if we can find one. The intuition
14+
here is that README files in general are highly stable and in common README
15+
structures the first few lines especially contain an extremely stable title
16+
and summary only.
17+
18+
As a fallback we go to the first few lines of the MODULE.bazel file. This is
19+
expected to be less stable than a README file generally because a
20+
MODULE.bazel could be a simple listing of dependencies with nothing else. In
21+
practice the first several lines are likely a comment or a `module()`
22+
invocation which will be highly stable.
23+
24+
We consider other possible sources of stable identifiers such as a version
25+
control remote URL out of bounds because they may contain secrets and
26+
because accessing them without invoking commands is challenging.
1927
2028
"""
2129

2230
readme_file = None
23-
for suffix in [
31+
32+
for prefix in [
2433
"",
25-
# Github allows the README to be squirreled away, so we may need to
26-
# check subdirs. Assume that gitlab et all allow the same.
2734
"doc",
2835
"docs",
29-
".github",
30-
".gitlab",
31-
".gitea",
32-
".forgejo",
36+
"Doc",
37+
"Docs",
3338
]:
34-
dir = repository_ctx.workspace_root
35-
if suffix:
36-
dir = paths.join(str(dir), suffix)
37-
dir = repository_ctx.path(dir)
38-
if dir.exists and dir.is_dir:
39-
for entry in dir.readdir():
40-
if entry.basename.lower().find("readme") != -1:
41-
readme_file = entry
39+
for base in [
40+
"README",
41+
"readme",
42+
"Readme",
43+
"index",
44+
]:
45+
# Alphabetically
46+
for ext in [
47+
"",
48+
".adoc",
49+
".asc",
50+
".asciidoc",
51+
".markdown",
52+
".md",
53+
".mdown",
54+
".mkdk",
55+
".org",
56+
".rdoc",
57+
".rst",
58+
".textile",
59+
".txt",
60+
".wiki",
61+
]:
62+
dir = repository_ctx.workspace_root
63+
if prefix:
64+
dir = paths.join(str(dir), prefix)
65+
file = repository_ctx.path(paths.join(str(dir), base + ext))
66+
if file.exists:
67+
readme_file = file
4268
break
4369

70+
if readme_file:
71+
break
72+
4473
if readme_file:
4574
break
4675

47-
# As a fallback use the top of the MODULE.bazel file
4876
if not readme_file:
4977
readme_file = repository_ctx.path(paths.join(str(repository_ctx.workspace_root), "MODULE.bazel"))
5078

51-
return hash(repository_ctx, "\n".join(repository_ctx.read(readme_file).split("\n")[:4]))
79+
content = "\n".join(repository_ctx.read(readme_file).split("\n")[:4])
80+
return hash(repository_ctx, content)
5281

5382

5483
def _repo_user(repository_ctx):
5584
"""Try to extract a fingerprint for the user who initiated the build.
5685
5786
Note that we salt the user IDs with the identified project ID to prevent
58-
correllation.
87+
correllation of user behavior across projects.
5988
6089
"""
6190

examples/simple/MODULE.bazel.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

examples/simple/WORKSPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# This project uses bzlmod
2+
# The WORKSPACE file is retained only for legacy Bazels

0 commit comments

Comments
 (0)