Skip to content

Commit 6cb55d2

Browse files
lqiu96JoeWang1127
andauthored
fix: Conventional Commit parser filters files based on Librarian flow (#2433)
Fixes: #2403 Generation PR is based on files changed in the source repo (googleapis) and NOT language repo. This should filter for files changed that match the the paths for each API in a library. Release PR is based on files changed in the language repo and NOT source repo. This should filter for files changed that match the the paths for a library's `source_root`. ## Changes - Introduce a parameter for generation / release flows to define their own files filtering logic (apiPaths vs sourceRoot) --------- Signed-off-by: Lawrence Qiu <[email protected]> Co-authored-by: Joe Wang <[email protected]>
1 parent 5b010b9 commit 6cb55d2

File tree

4 files changed

+255
-37
lines changed

4 files changed

+255
-37
lines changed

internal/librarian/commit_version_analyzer.go

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -28,20 +28,39 @@ import (
2828

2929
const defaultTagFormat = "{id}-{version}"
3030

31-
// GetConventionalCommitsSinceLastRelease returns all conventional commits for the given library since the
32-
// version specified in the state file.
33-
func GetConventionalCommitsSinceLastRelease(repo gitrepo.Repository, library *config.LibraryState) ([]*conventionalcommits.ConventionalCommit, error) {
31+
// getConventionalCommitsSinceLastRelease returns all conventional commits for the given library since the
32+
// version specified in the state file. The repo should be the language repo.
33+
func getConventionalCommitsSinceLastRelease(repo gitrepo.Repository, library *config.LibraryState) ([]*conventionalcommits.ConventionalCommit, error) {
3434
tag := formatTag(library.TagFormat, library.ID, library.Version)
3535
commits, err := repo.GetCommitsForPathsSinceTag(library.SourceRoots, tag)
3636
if err != nil {
3737
return nil, fmt.Errorf("failed to get commits for library %s: %w", library.ID, err)
3838
}
3939

40-
return convertToConventionalCommits(repo, library, commits)
40+
// checks that if the files in the commit are in the sources root. The release
41+
// changes are in the language repo and NOT in the source repo.
42+
shouldIncludeFiles := func(files []string) bool {
43+
return shouldIncludeForRelease(files, library.SourceRoots, library.ReleaseExcludePaths)
44+
}
45+
46+
return convertToConventionalCommits(repo, library, commits, shouldIncludeFiles)
47+
}
48+
49+
// shouldIncludeForRelease determines if a commit should be included in a release.
50+
// It returns true if there is at least one file in the commit that is under a source_root
51+
// and not under a release_exclude_path.
52+
func shouldIncludeForRelease(files, sourceRoots, excludePaths []string) bool {
53+
for _, file := range files {
54+
if isUnderAnyPath(file, sourceRoots) && !isUnderAnyPath(file, excludePaths) {
55+
return true
56+
}
57+
}
58+
return false
4159
}
4260

4361
// getConventionalCommitsSinceLastGeneration returns all conventional commits for
44-
// all API paths in given library since the last generation.
62+
// all API paths in given library since the last generation. The repo input should
63+
// be the googleapis source repo.
4564
func getConventionalCommitsSinceLastGeneration(repo gitrepo.Repository, library *config.LibraryState, lastGenCommit string) ([]*conventionalcommits.ConventionalCommit, error) {
4665
if lastGenCommit == "" {
4766
slog.Info("the last generation commit is empty, skip fetching conventional commits", "library", library.ID)
@@ -58,17 +77,38 @@ func getConventionalCommitsSinceLastGeneration(repo gitrepo.Repository, library
5877
return nil, fmt.Errorf("failed to get commits for library %s at commit %s: %w", library.ID, lastGenCommit, err)
5978
}
6079

61-
return convertToConventionalCommits(repo, library, commits)
80+
// checks that the files in the commit are in the api paths for the source repo.
81+
// The generation change is for changes in the source repo and NOT the language repo.
82+
shouldIncludeFiles := func(files []string) bool {
83+
return shouldIncludeForGeneration(files, apiPaths)
84+
}
85+
86+
return convertToConventionalCommits(repo, library, commits, shouldIncludeFiles)
6287
}
6388

64-
func convertToConventionalCommits(repo gitrepo.Repository, library *config.LibraryState, commits []*gitrepo.Commit) ([]*conventionalcommits.ConventionalCommit, error) {
89+
// shouldIncludeForGeneration determines if a commit should be included in generation.
90+
// It returns true if there is at least one file in the commit that is under the
91+
// library's API(s) path (a library could have multiple APIs).
92+
func shouldIncludeForGeneration(files, apiPaths []string) bool {
93+
for _, file := range files {
94+
if isUnderAnyPath(file, apiPaths) {
95+
return true
96+
}
97+
}
98+
return false
99+
}
100+
101+
// convertToConventionalCommits converts a list of commits in a git repo into a list
102+
// of conventional commits. The filesFilter parameter is custom filter out non-matching
103+
// files depending on a generation or a release change.
104+
func convertToConventionalCommits(repo gitrepo.Repository, library *config.LibraryState, commits []*gitrepo.Commit, filesFilter func(files []string) bool) ([]*conventionalcommits.ConventionalCommit, error) {
65105
var conventionalCommits []*conventionalcommits.ConventionalCommit
66106
for _, commit := range commits {
67107
files, err := repo.ChangedFilesInCommit(commit.Hash.String())
68108
if err != nil {
69109
return nil, fmt.Errorf("failed to get changed files for commit %s: %w", commit.Hash.String(), err)
70110
}
71-
if !shouldInclude(files, library.SourceRoots, library.ReleaseExcludePaths) {
111+
if !filesFilter(files) {
72112
continue
73113
}
74114
parsedCommits, err := conventionalcommits.ParseCommits(commit, library.ID)
@@ -97,18 +137,6 @@ func isUnderAnyPath(file string, paths []string) bool {
97137
return false
98138
}
99139

100-
// shouldInclude determines if a commit should be included in a release.
101-
// It returns true if there is at least one file in the commit that is under a source_root
102-
// and not under a release_exclude_path.
103-
func shouldInclude(files, sourceRoots, excludePaths []string) bool {
104-
for _, file := range files {
105-
if isUnderAnyPath(file, sourceRoots) && !isUnderAnyPath(file, excludePaths) {
106-
return true
107-
}
108-
}
109-
return false
110-
}
111-
112140
// formatTag returns the git tag for a given library version.
113141
func formatTag(tagFormat string, libraryID string, version string) string {
114142
if tagFormat == "" {

internal/librarian/commit_version_analyzer_test.go

Lines changed: 171 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import (
2727
"github.com/googleapis/librarian/internal/semver"
2828
)
2929

30-
func TestShouldInclude(t *testing.T) {
30+
func TestShouldIncludeForRelease(t *testing.T) {
3131
t.Parallel()
3232
for _, test := range []struct {
3333
name string
@@ -135,9 +135,45 @@ func TestShouldInclude(t *testing.T) {
135135
} {
136136
t.Run(test.name, func(t *testing.T) {
137137
t.Parallel()
138-
got := shouldInclude(test.files, test.sourceRoots, test.excludePaths)
138+
got := shouldIncludeForRelease(test.files, test.sourceRoots, test.excludePaths)
139139
if got != test.want {
140-
t.Errorf("shouldInclude(%v, %v, %v) = %v, want %v", test.files, test.sourceRoots, test.excludePaths, got, test.want)
140+
t.Errorf("shouldIncludeForRelease(%v, %v, %v) = %v, want %v", test.files, test.sourceRoots, test.excludePaths, got, test.want)
141+
}
142+
})
143+
}
144+
}
145+
146+
func TestShouldIncludeForGeneration(t *testing.T) {
147+
t.Parallel()
148+
for _, test := range []struct {
149+
name string
150+
files []string
151+
apiPaths []string
152+
want bool
153+
}{
154+
{
155+
name: "all_files_in_apiPaths",
156+
files: []string{"a/b/c.proto"},
157+
apiPaths: []string{"a"},
158+
want: true,
159+
},
160+
{
161+
name: "some_files_in_apiPaths",
162+
files: []string{"a/b/c.proto", "e/f/g.proto"},
163+
apiPaths: []string{"a"},
164+
want: true,
165+
},
166+
{
167+
name: "no_files_in_apiPaths",
168+
files: []string{"a/b/c.proto"},
169+
apiPaths: []string{"b"},
170+
want: false,
171+
},
172+
} {
173+
t.Run(test.name, func(t *testing.T) {
174+
got := shouldIncludeForGeneration(test.files, test.apiPaths)
175+
if got != test.want {
176+
t.Errorf("shouldIncludeForGeneration(%v, %v) = %v, want %v", test.files, test.apiPaths, got, test.want)
141177
}
142178
})
143179
}
@@ -220,7 +256,7 @@ func TestGetConventionalCommitsSinceLastRelease(t *testing.T) {
220256
wantErrPhrase string
221257
}{
222258
{
223-
name: "get_commits_for_foo",
259+
name: "found_matching_commits_for_foo",
224260
repo: repoWithCommits,
225261
library: &config.LibraryState{
226262
ID: "foo",
@@ -246,6 +282,34 @@ func TestGetConventionalCommitsSinceLastRelease(t *testing.T) {
246282
},
247283
},
248284
},
285+
{
286+
name: "no_matching_commits_for_foo",
287+
repo: repoWithCommits,
288+
library: &config.LibraryState{
289+
ID: "foo",
290+
Version: "1.0.0",
291+
TagFormat: "{id}-v{version}",
292+
SourceRoots: []string{"no_matching_dir"},
293+
},
294+
},
295+
{
296+
name: "apiPaths_has_no_impact_on_release",
297+
repo: repoWithCommits,
298+
library: &config.LibraryState{
299+
ID: "foo",
300+
Version: "1.0.0",
301+
TagFormat: "{id}-v{version}",
302+
SourceRoots: []string{"no_matching_dir"}, // For release, only this is considered
303+
APIs: []*config.API{
304+
{
305+
Path: "foo",
306+
},
307+
{
308+
Path: "bar",
309+
},
310+
},
311+
},
312+
},
249313
{
250314
name: "GetCommitsForPathsSinceTag error",
251315
repo: &MockRepository{
@@ -281,7 +345,7 @@ func TestGetConventionalCommitsSinceLastRelease(t *testing.T) {
281345
},
282346
} {
283347
t.Run(test.name, func(t *testing.T) {
284-
got, err := GetConventionalCommitsSinceLastRelease(test.repo, test.library)
348+
got, err := getConventionalCommitsSinceLastRelease(test.repo, test.library)
285349
if test.wantErr {
286350
if err == nil {
287351
t.Fatal("GetConventionalCommitsSinceLastRelease() should have failed")
@@ -301,6 +365,108 @@ func TestGetConventionalCommitsSinceLastRelease(t *testing.T) {
301365
}
302366
}
303367

368+
func TestGetConventionalCommitsSinceLastGeneration(t *testing.T) {
369+
t.Parallel()
370+
for _, test := range []struct {
371+
name string
372+
repo gitrepo.Repository
373+
library *config.LibraryState
374+
want []*conventionalcommits.ConventionalCommit
375+
wantErr bool
376+
wantErrPhrase string
377+
}{
378+
{
379+
name: "found_matching_file_changes_for_foo",
380+
library: &config.LibraryState{
381+
ID: "foo",
382+
APIs: []*config.API{
383+
{
384+
Path: "foo",
385+
},
386+
},
387+
},
388+
repo: &MockRepository{
389+
GetCommitsForPathsSinceLastGenByCommit: map[string][]*gitrepo.Commit{
390+
"1234": {
391+
{Message: "feat(foo): a feature"},
392+
},
393+
},
394+
ChangedFilesInCommitValue: []string{"foo/a.proto"},
395+
},
396+
want: []*conventionalcommits.ConventionalCommit{
397+
{
398+
Type: "feat",
399+
Scope: "foo",
400+
Subject: "a feature",
401+
LibraryID: "foo",
402+
Footers: map[string]string{},
403+
},
404+
},
405+
},
406+
{
407+
name: "no_matching_file_changes_for_foo",
408+
library: &config.LibraryState{
409+
ID: "foo",
410+
APIs: []*config.API{
411+
{
412+
Path: "foo",
413+
},
414+
},
415+
},
416+
repo: &MockRepository{
417+
GetCommitsForPathsSinceLastGenByCommit: map[string][]*gitrepo.Commit{
418+
"1234": {
419+
{Message: "feat(baz): a feature"},
420+
},
421+
},
422+
ChangedFilesInCommitValue: []string{"baz/a.proto", "baz/b.proto", "bar/a.proto"}, // file changed is not in foo/*
423+
},
424+
},
425+
{
426+
name: "sources_root_has_no_impact",
427+
library: &config.LibraryState{
428+
ID: "foo",
429+
APIs: []*config.API{
430+
{
431+
Path: "foo", // For generation, only this is considered
432+
},
433+
},
434+
SourceRoots: []string{
435+
"baz/",
436+
"bar/",
437+
},
438+
},
439+
repo: &MockRepository{
440+
GetCommitsForPathsSinceLastGenByCommit: map[string][]*gitrepo.Commit{
441+
"1234": {
442+
{Message: "feat(baz): a feature"},
443+
},
444+
},
445+
ChangedFilesInCommitValue: []string{"baz/a.proto", "baz/b.proto", "bar/a.proto"}, // file changed is not in foo/*
446+
},
447+
},
448+
} {
449+
t.Run(test.name, func(t *testing.T) {
450+
got, err := getConventionalCommitsSinceLastGeneration(test.repo, test.library, "1234")
451+
if test.wantErr {
452+
if err == nil {
453+
t.Fatal("getConventionalCommitsSinceLastGeneration() should have failed")
454+
}
455+
if !strings.Contains(err.Error(), test.wantErrPhrase) {
456+
t.Errorf("GetConventionalCommitsSinceLastRelease() returned error %q, want to contain %q", err.Error(), test.wantErrPhrase)
457+
}
458+
return
459+
}
460+
if err != nil {
461+
t.Fatalf("GetConventionalCommitsSinceLastRelease() failed: %v", err)
462+
}
463+
if diff := cmp.Diff(test.want, got, cmpopts.IgnoreFields(conventionalcommits.ConventionalCommit{}, "SHA", "CommitHash", "Body", "IsBreaking", "When")); diff != "" {
464+
t.Errorf("GetConventionalCommitsSinceLastRelease() mismatch (-want +got):\n%s", diff)
465+
}
466+
})
467+
}
468+
}
469+
304470
func TestGetHighestChange(t *testing.T) {
305471
t.Parallel()
306472
for _, test := range []struct {

internal/librarian/release_init.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ func (r *initRunner) runInitCommand(ctx context.Context, outputDir string) error
196196
// processLibrary wrapper to process the library for release. Helps retrieve latest commits
197197
// since the last release and passing the changes to updateLibrary.
198198
func (r *initRunner) processLibrary(library *config.LibraryState) error {
199-
commits, err := GetConventionalCommitsSinceLastRelease(r.repo, library)
199+
commits, err := getConventionalCommitsSinceLastRelease(r.repo, library)
200200
if err != nil {
201201
return fmt.Errorf("failed to fetch conventional commits for library, %s: %w", library.ID, err)
202202
}

0 commit comments

Comments
 (0)