Skip to content

refactor: Improve GitHub App installation ID retrieval #2154

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 39 additions & 53 deletions pkg/provider/github/app/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ import (
"context"
"fmt"
"net/http"
"net/url"
"slices"
"strings"
"time"

"github.com/golang-jwt/jwt/v4"
gt "github.com/google/go-github/v71/github"
"github.com/openshift-pipelines/pipelines-as-code/pkg/apis/pipelinesascode/v1alpha1"
"github.com/openshift-pipelines/pipelines-as-code/pkg/params"
"github.com/openshift-pipelines/pipelines-as-code/pkg/provider/github"
Expand Down Expand Up @@ -38,72 +40,59 @@ func NewInstallation(req *http.Request, run *params.Run, repo *v1alpha1.Reposito
}

// GetAndUpdateInstallationID retrieves and updates the installation ID for the GitHub App.
// It generates a JWT token, lists all installations, and matches repositories to their installation IDs.
// If a matching repository is found, it returns the enterprise host, token, and installation ID.
// It generates a JWT token, and directly fetches the installation for the
// repository.
func (ip *Install) GetAndUpdateInstallationID(ctx context.Context) (string, string, int64, error) {
var (
enterpriseHost, token string
installationID int64
)
logger := logging.FromContext(ctx)

// Generate a JWT token for authentication
jwtToken, err := ip.GenerateJWT(ctx)
if err != nil {
return "", "", 0, err
}

// Get owner and repo from the repository URL
repoURL, err := url.Parse(ip.repo.Spec.URL)
if err != nil {
return "", "", 0, fmt.Errorf("failed to parse repository URL: %w", err)
}
pathParts := strings.Split(strings.Trim(repoURL.Path, "/"), "/")
if len(pathParts) < 2 {
return "", "", 0, fmt.Errorf("invalid repository URL path: %s", repoURL.Path)
}
Comment on lines +59 to +62
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick

Should this be checking for longer paths as well? I know Gitlab supports sub-projects but I don't think Github would support /foo/bar/baz. It technically does resolve for example https://github.com/openshift-pipelines//pipelines-as-code with the two slashes, but since the Repository CR's URL semantically has to match the URL sent in the Github event payload I don't think that // will be valid in the RepositoryCR's URL in the first place.

Suggested change
pathParts := strings.Split(strings.Trim(repoURL.Path, "/"), "/")
if len(pathParts) < 2 {
return "", "", 0, fmt.Errorf("invalid repository URL path: %s", repoURL.Path)
}
pathParts := strings.Split(strings.Trim(repoURL.Path, "/"), "/")
if len(pathParts) != 2 {
return "", "", 0, fmt.Errorf("invalid repository URL path: expected '<organization>/<repository>', got: %s", repoURL.Path)
}

owner := pathParts[0]
repoName := pathParts[1]
Comment on lines +63 to +64
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we check them for being empty? Or is it supposed to be always present?
if owner == "" || repoName == "" { return "", "", 0, fmt.Errorf("invalid repository URL: owner or repo name is empty") }

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not bad to be on the safe side, but tbh that's probably not a concern. It would require there to be // in the path which, while possibly accepted by github, is never going to match the events from Github and therefore isn't valid as a Respository CR.


apiURL := *ip.ghClient.APIURL
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
apiURL := *ip.ghClient.APIURL
if ip.ghClient.APIURL == nil {
return "", "", 0, fmt.Errorf("github client APIURL is nil")
}
apiURL := *ip.ghClient.APIURL

enterpriseHost = ip.request.Header.Get("X-GitHub-Enterprise-Host")
enterpriseHost := ip.request.Header.Get("X-GitHub-Enterprise-Host")
if enterpriseHost != "" {
// NOTE: Hopefully this works even when the GHE URL is on another host than the API URL
apiURL = "https://" + enterpriseHost + "/api/v3"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
apiURL = "https://" + enterpriseHost + "/api/v3"
apiURL = fmt.Sprintf("https://%s/api/v3", strings.TrimSuffix(enterpriseHost, "/"))

}

logger := logging.FromContext(ctx)
opt := &gt.ListOptions{PerPage: ip.ghClient.PaginedNumber}
client, _, _ := github.MakeClient(ctx, apiURL, jwtToken)
installationData := []*gt.Installation{}

// List all installations
for {
installationSet, resp, err := client.Apps.ListInstallations(ctx, opt)
// Directly get the installation for the repository
installation, _, err := client.Apps.FindRepositoryInstallation(ctx, owner, repoName)
if err != nil {
// Fallback to finding organization installation if repository installation is not found
installation, _, err = client.Apps.FindOrganizationInstallation(ctx, owner)
if err != nil {
Comment on lines +73 to 78
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change needed:
The Installation could also be installed for the User as well, so we'll also need to check client.Apps.FindUserInstallation() as well. If only these methods all had the same signature we could just iterate through a list of the methods, alas 😖

return "", "", 0, err
return "", "", 0, fmt.Errorf("could not find repository or organization installation for %s/%s: %w", owner, repoName, err)
}
installationData = append(installationData, installationSet...)
if resp.NextPage == 0 {
break
}
opt.Page = resp.NextPage
}

// Iterate through each installation to find a matching repository
for i := range installationData {
if installationData[i].ID == nil {
return "", "", 0, fmt.Errorf("installation ID is nil")
}
if *installationData[i].ID != 0 {
token, err = ip.ghClient.GetAppToken(ctx, ip.run.Clients.Kube, enterpriseHost, *installationData[i].ID, ip.namespace)
// While looping on the list of installations, there could be cases where we can't
// obtain a token for installation. In a test I did for GitHub App with ~400
// installations, there were 3 failing consistently with:
// "could not refresh installation id XXX's token: received non 2xx response status "403 Forbidden".
// If there is a matching installation after the failure, we miss it. So instead of
// failing, we just log the error and continue. Token is "".
if err != nil {
logger.Warn(err)
continue
}
}
exist, err := ip.matchRepos(ctx)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

with ip.matchRepos no longer used in this file, we can move it to the test file as it's now used only there.

if err != nil {
return "", "", 0, err
}
if exist {
installationID = *installationData[i].ID
break
}
if installation.ID == nil {
return "", "", 0, fmt.Errorf("installation ID is nil")
}
Comment on lines +83 to 85
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit pick suggestion - in case a user is reading this, a little more verbosity here could help understand what this error means. I don't know when (if ever) the installation ID might be nil but I can only assume that it would indicate a bug in how we use the API or a bug in the github-go library

Suggested change
if installation.ID == nil {
return "", "", 0, fmt.Errorf("installation ID is nil")
}
if installation.ID == nil {
return "", "", 0, fmt.Errorf("Github App installation found but contained no ID. This is likely a bug")
}


installationID := *installation.ID
token, err := ip.ghClient.GetAppToken(ctx, ip.run.Clients.Kube, enterpriseHost, installationID, ip.namespace)
if err != nil {
logger.Warnf("Could not get a token for installation ID %d: %v", installationID, err)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be an event instead of just a log? Not sure when events are desirable instead of just logs

// Return with the installation ID even if token generation fails,
// as some operations might only need the ID.
return enterpriseHost, "", installationID, nil
}

return enterpriseHost, token, installationID, nil
}

Expand All @@ -116,11 +105,8 @@ func (ip *Install) matchRepos(ctx context.Context) (bool, error) {
return false, err
}
ip.repoList = append(ip.repoList, installationRepoList...)
for i := range installationRepoList {
// If URL matches with repo spec URL then we can break the loop
if installationRepoList[i] == ip.repo.Spec.URL {
return true, nil
}
if slices.Contains(installationRepoList, ip.repo.Spec.URL) {
return true, nil
}
return false, nil
}
Expand Down
11 changes: 9 additions & 2 deletions pkg/provider/github/app/token_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ func Test_GetAndUpdateInstallationID(t *testing.T) {
badToken := "BADTOKEN"
badID := 666
missingID := 111
orgName := "org"

fakeghclient, mux, serverURL, teardown := ghtesthelper.SetupGH()
defer teardown()
Expand Down Expand Up @@ -214,7 +215,7 @@ func Test_GetAndUpdateInstallationID(t *testing.T) {
Name: "repo",
},
Spec: v1alpha1.RepositorySpec{
URL: "https://matched/by/incoming",
URL: fmt.Sprintf("https://matched/%s/incoming", orgName),
Incomings: &[]v1alpha1.Incoming{
{
Targets: []string{"main"},
Expand All @@ -235,6 +236,10 @@ func Test_GetAndUpdateInstallationID(t *testing.T) {
_, _ = fmt.Fprintf(w, `{"token": "%s"}`, wantToken)
})

mux.HandleFunc(fmt.Sprintf("/orgs/%s/installation", orgName), func(w http.ResponseWriter, _ *http.Request) {
_, _ = fmt.Fprintf(w, `{"id": %d}`, wantID)
})
Comment on lines +239 to +241
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mind adding a test case for the fallback(s) as well?


mux.HandleFunc(fmt.Sprintf("/app/installations/%d/access_tokens", badID), func(w http.ResponseWriter, r *http.Request) {
testMethod(t, r, "POST")
w.Header().Set("Authorization", "Bearer "+jwtToken)
Expand All @@ -247,7 +252,9 @@ func Test_GetAndUpdateInstallationID(t *testing.T) {
mux.HandleFunc("/installation/repositories", func(w http.ResponseWriter, _ *http.Request) {
w.Header().Set("Authorization", "Bearer 12345")
w.Header().Set("Accept", "application/vnd.github+json")
_, _ = fmt.Fprint(w, `{"total_count": 2,"repositories": [{"id":1,"html_url": "https://matched/by/incoming"},{"id":2,"html_url": "https://anotherrepo/that/would/failit"}]}`)
_, _ = fmt.Fprintf(w,
`{"total_count": 2,"repositories": [{"id":1,"html_url": "https://matched/%s/incoming"},{"id":2,"html_url": "https://anotherrepo/that/would/failit"}]}`,
orgName)
})
ip = NewInstallation(req, run, repo, gprovider, testNamespace.GetName())
_, token, installationID, err := ip.GetAndUpdateInstallationID(ctx)
Expand Down
Loading