diff --git a/docs/content/docs/install/logging.md b/docs/content/docs/install/logging.md new file mode 100644 index 000000000..634aeefed --- /dev/null +++ b/docs/content/docs/install/logging.md @@ -0,0 +1,102 @@ +--- +title: Logging +weight: 4 +--- + +## Logging Configuration + +Pipelines-as-Code uses a ConfigMap named `pac-config-logging` in its namespace (by default, `pipelines-as-code`) to configure the logging behavior of its controllers. + +To view the ConfigMap, use the following command: + +```bash +kubectl get configmap pac-config-logging -n pipelines-as-code +``` + +To see the full content of the ConfigMap, run: + +```bash +kubectl get configmap pac-config-logging -n pipelines-as-code -o yaml +``` + +The `data` section of the ConfigMap contains the following keys: + +* `loglevel.pipelinesascode`: The log level for the `pipelines-as-code-controller` component. +* `loglevel.pipelines-as-code-webhook`: The log level for the `pipelines-as-code-webhook` component. +* `loglevel.pac-watcher`: The log level for the `pipelines-as-code-watcher` component. + +You can change the log level from `info` to `debug` or any other supported value. For example, to set the log level for the `pipelines-as-code-watcher` to `debug`, run: + +```bash +kubectl patch configmap pac-config-logging -n pipelines-as-code --type json -p '[{"op": "replace", "path": "/data/loglevel.pac-watcher", "value":"debug"}]' +``` + +The watcher will automatically pick up the new log level. + +If you want to use the same log level for all Pipelines-as-Code components, you can remove the individual `loglevel.*` keys. In this case, all components will use the log level defined in the `level` field of the `zap-logger-config`. + +```bash +kubectl patch configmap pac-config-logging -n pipelines-as-code --type json -p '[{"op": "remove", "path": "/data/loglevel.pac-watcher"}, {"op": "remove", "path": "/data/loglevel.pipelines-as-code-webhook"}, {"op": "remove", "path": "/data/loglevel.pipelinesascode"}]' +``` + +The `zap-logger-config` supports the following log levels: + +* `debug`: Fine-grained debugging information. +* `info`: Normal operational logging. +* `warn`: Unexpected but non-critical errors. +* `error`: Critical errors that are unexpected during normal operation. +* `dpanic`: Triggers a panic (crash) in development mode. +* `panic`: Triggers a panic (crash). +* `fatal`: Immediately exits with a status of 1. + +For more details, see the [Knative logging documentation](https://knative.dev/docs/serving/observability/logging/config-logging). + +## Debugging API Interactions + +If you need to troubleshoot interactions with the Git provider API (e.g., GitHub), you can enable detailed API request logging. This is useful for debugging permission issues or unexpected API responses. + +To enable this feature, set the log level for the `pipelines-as-code-controller` to `debug`. This will cause the controller to log the duration, URL, and remaining rate-limit for each API call it makes. + +You can enable this for the main controller with the following `kubectl` command: + +```bash +kubectl patch configmap pac-config-logging -n pipelines-as-code --type json -p '[{"op": "replace", "path": "/data/loglevel.pipelinesascode", "value":"debug"}]' +``` + +### Rate Limit Monitoring + +When debug logging is enabled, Pipelines-as-Code automatically monitors GitHub API rate limits and provides intelligent warnings when limits are running low. This helps prevent API quota exhaustion and provides early warning for potential service disruptions. + +The rate limit monitoring includes: + +* **Debug level**: All API calls log their duration, URL, and remaining rate limit count +* **Info level**: When remaining calls drop below 500, logs include additional context like total limit and reset time +* **Warning level**: When remaining calls drop below 100, warnings are logged to alert administrators +* **Error level**: When remaining calls drop below 50, critical alerts are logged indicating immediate attention is needed + +#### Example Log Messages + +```console +# Debug level - normal API call logging +{"level":"debug","ts":"...","caller":"...","msg":"GitHub API call completed","operation":"get_pull_request","duration_ms":23,"provider":"github","repo":"myorg/myrepo","url_path":"/repos/myorg/myrepo/pulls/1","rate_limit_remaining":"4850","status_code":200} +# Info level - moderate rate limit usage +{"level":"info","ts":"...","caller":"...","msg":"GitHub API rate limit moderate","repo":"myorg/myrepo","remaining":350,"limit":"5000","reset":"1672531200 (15:30:00 UTC)"} +# Warning level - low rate limit +{"level":"warn","ts":"...","caller":"...","msg":"GitHub API rate limit running low","repo":"myorg/myrepo","remaining":75,"limit":"5000","reset":"1672531200 (15:30:00 UTC)"} +# Error level - critically low rate limit +{"level":"error","ts":"...","caller":"...","msg":"GitHub API rate limit critically low","repo":"myorg/myrepo","remaining":25,"limit":"5000","reset":"1672531200 (15:30:00 UTC)"} +``` + +The rate limit information includes: + +* **Remaining calls**: Number of API calls left in the current rate limit window +* **Total limit**: The maximum number of calls allowed (typically 5000 for authenticated requests) +* **Reset time**: When the rate limit window resets (shown as Unix timestamp and human-readable time) +* **Repository context**: Which repository triggered the API call (when available) + +This monitoring helps administrators: + +* **Prevent service disruptions**: Early warnings allow proactive measures before hitting rate limits +* **Optimize API usage**: Identify repositories or operations that consume excessive API calls +* **Plan maintenance windows**: Schedule intensive operations around rate limit reset times +* **Debug authentication issues**: Rate limit headers can indicate token validity and permissions diff --git a/docs/content/docs/install/settings.md b/docs/content/docs/install/settings.md index 57f35d42d..7d19bf96d 100644 --- a/docs/content/docs/install/settings.md +++ b/docs/content/docs/install/settings.md @@ -416,91 +416,3 @@ A few settings are available to configure this feature: The provider set to `GitHub App` by tkn pac bootstrap, used to detect if a GitHub App is already configured when a user runs the bootstrap command a second time or the `webhook add` command. - -## Logging Configuration - - Pipelines-as-Code uses the ConfigMap named `pac-config-logging` in the same namespace (`pipelines-as-code` by default) as the controllers. To get the ConfigMap use the following command: - - ```bash - $ kubectl get configmap pac-config-logging -n pipelines-as-code - - NAME DATA AGE - pac-config-logging 4 9m44s - ``` - - To retrieve the content of the ConfigMap: - - ```bash - $ kubectl get configmap pac-config-logging -n pipelines-as-code -o yaml - - apiVersion: v1 - kind: ConfigMap - metadata: - labels: - app.kubernetes.io/instance: default - app.kubernetes.io/part-of: pipelines-as-code - name: pac-config-logging - namespace: pipelines-as-code - data: - loglevel.pac-watcher: info - loglevel.pipelines-as-code-webhook: info - loglevel.pipelinesascode: info - zap-logger-config: | - { - "level": "info", - "development": false, - "sampling": { - "initial": 100, - "thereafter": 100 - }, - "outputPaths": ["stdout"], - "errorOutputPaths": ["stderr"], - "encoding": "json", - "encoderConfig": { - "timeKey": "ts", - "levelKey": "level", - "nameKey": "logger", - "callerKey": "caller", - "messageKey": "msg", - "stacktraceKey": "stacktrace", - "lineEnding": "", - "levelEncoder": "", - "timeEncoder": "iso8601", - "durationEncoder": "", - "callerEncoder": "" - } - } - ``` - -The `loglevel.*` fields define the log level for the controllers: - -* loglevel.pipelinesascode - the log level for the pipelines-as-code-controller component -* loglevel.pipelines-as-code-webhook - the log level for the pipelines-as-code-webhook component -* loglevel.pac-watcher - the log level for the pipelines-as-code-watcher component - -You can change the log level from `info` to `debug` or any other supported values. For example, select the `debug` log level for the pipelines-as-code-watcher component: - -```bash -kubectl patch configmap pac-config-logging -n pipelines-as-code --type json -p '[{"op": "replace", "path": "/data/loglevel.pac-watcher", "value":"debug"}]' -``` - -After this command, the controller gets a new log level value. -If you want to use the same log level for all Pipelines-as-Code components, delete `level.*` values from configmap: - -```bash -kubectl patch configmap pac-config-logging -n pipelines-as-code --type json -p '[ {"op": "remove", "path": "/data/loglevel.pac-watcher"}, {"op": "remove", "path": "/data/loglevel.pipelines-as-code-webhook"}, {"op": "remove", "path": "/data/loglevel.pipelinesascode"}]' -``` - -In this case, all Pipelines-as-Code components get a common log level from `zap-logger-config` - `level` field from the json. - -`zap-logger-config` supports the following log levels: - -* debug - fine-grained debugging -* info - normal logging -* warn - unexpected but non-critical errors -* error - critical errors; unexpected during normal operation -* dpanic - in debug mode, trigger a panic (crash) -* panic - trigger a panic (crash) -* fatal - immediately exit with exit status 1 (failure) - -See more: diff --git a/hack/dev/kind/install.sh b/hack/dev/kind/install.sh index 4e310ef71..d978ea4ba 100755 --- a/hack/dev/kind/install.sh +++ b/hack/dev/kind/install.sh @@ -197,6 +197,7 @@ function configure_pac() { kubectl patch configmap -n pipelines-as-code -p \ '{"data":{"catalog-1-type": "artifacthub", "catalog-1-id": "pachub", "catalog-1-name": "pipelines-as-code", "catalog-1-url": "https://artifacthub.io"}}' \ --type merge pipelines-as-code + kubectl patch configmap pac-config-logging -n pipelines-as-code --type json -p '[{"op": "replace", "path": "/data/loglevel.pipelinesascode", "value":"debug"}]' set +x if [[ -n ${PAC_PASS_SECRET_FOLDER} ]]; then echo "Installing PAC secrets" diff --git a/hack/gh-workflow-ci.sh b/hack/gh-workflow-ci.sh index b8ab2d297..6fd06152f 100755 --- a/hack/gh-workflow-ci.sh +++ b/hack/gh-workflow-ci.sh @@ -3,6 +3,8 @@ # Helper script for GitHub Actions CI, used from e2e tests. set -exufo pipefail +export PAC_API_INSTRUMENTATION_DIR=/tmp/api-instrumentation + create_pac_github_app_secret() { # Read from environment variables instead of arguments local app_private_key="${PAC_GITHUB_PRIVATE_KEY}" @@ -93,7 +95,7 @@ run_e2e_tests() { output_logs() { if command -v "snazy" >/dev/null 2>&1; then - snazy --skip-line-regexp="^(Reconcile (succeeded|error)|Updating webhook)" /tmp/logs/pac-pods.log + snazy --extra-fields --skip-line-regexp="^(Reconcile (succeeded|error)|Updating webhook)" /tmp/logs/pac-pods.log else # snazy for the poors python -c "import sys,json,datetime; [print(f'• { (lambda t: datetime.datetime.fromisoformat(t.rstrip(\"Z\")).strftime(\"%H:%M:%S\") if isinstance(t,str) else datetime.datetime.fromtimestamp(t).strftime(\"%H:%M:%S\"))(json.loads(l.strip())[\"ts\"] )} {json.loads(l.strip()).get(\"msg\",\"\")}') if l.strip().startswith('{') else print(l.strip()) for l in sys.stdin]" \ @@ -127,6 +129,11 @@ collect_logs() { kubectl -n ${ns} get events >/tmp/logs/ns/${ns}/events done + if [[ -d ${PAC_API_INSTRUMENTATION_DIR} && -n "$(ls -A ${PAC_API_INSTRUMENTATION_DIR})" ]]; then + echo "Copying API instrumentation logs from ${PAC_API_INSTRUMENTATION_DIR}" + cp -a ${PAC_API_INSTRUMENTATION_DIR} /tmp/logs/$(basename ${PAC_API_INSTRUMENTATION_DIR}) + fi + for url in "${test_gitea_smee_url}" "${github_ghe_smee_url}"; do find /tmp/logs -type f -exec grep -l "${url}" {} \; | xargs -r sed -i "s|${url}|SMEE_URL|g" done diff --git a/pkg/provider/github/acl.go b/pkg/provider/github/acl.go index e4c224cda..e6f144a75 100644 --- a/pkg/provider/github/acl.go +++ b/pkg/provider/github/acl.go @@ -20,7 +20,9 @@ func (v *Provider) CheckPolicyAllowing(ctx context.Context, event *info.Event, a // TODO: caching opt := github.ListOptions{PerPage: v.PaginedNumber} for { - members, resp, err := v.Client().Teams.ListTeamMembersBySlug(ctx, event.Organization, team, &github.TeamListTeamMembersOptions{ListOptions: opt}) + members, resp, err := wrapAPI(v, "list_team_members_by_slug", func() ([]*github.User, *github.Response, error) { + return v.Client().Teams.ListTeamMembersBySlug(ctx, event.Organization, team, &github.TeamListTeamMembersOptions{ListOptions: opt}) + }) if resp.StatusCode == http.StatusNotFound { // we explicitly disallow the policy when the team is not found // maybe we should ignore it instead? i'd rather keep this explicit @@ -169,7 +171,9 @@ func (v *Provider) aclAllowedOkToTestFromAnOwner(ctx context.Context, event *inf // aclAllowedOkToTestCurrentEvent only check if this is issue comment event // have /ok-to-test regex and sender is allowed. func (v *Provider) aclAllowedOkToTestCurrentComment(ctx context.Context, revent *info.Event, id int64) (bool, error) { - comment, _, err := v.Client().Issues.GetComment(ctx, revent.Organization, revent.Repository, id) + comment, _, err := wrapAPI(v, "get_issue_comment", func() (*github.IssueComment, *github.Response, error) { + return v.Client().Issues.GetComment(ctx, revent.Organization, revent.Repository, id) + }) if err != nil { return false, err } @@ -235,7 +239,9 @@ func (v *Provider) aclCheckAll(ctx context.Context, rev *info.Event) (bool, erro // // ex: dependabot, *[bot] etc... func (v *Provider) checkPullRequestForSameURL(ctx context.Context, runevent *info.Event) (bool, error) { - pr, resp, err := v.Client().PullRequests.Get(ctx, runevent.Organization, runevent.Repository, runevent.PullRequestNumber) + pr, resp, err := wrapAPI(v, "get_pull_request", func() (*github.PullRequest, *github.Response, error) { + return v.Client().PullRequests.Get(ctx, runevent.Organization, runevent.Repository, runevent.PullRequestNumber) + }) if err != nil { return false, err } @@ -258,7 +264,9 @@ func (v *Provider) checkSenderOrgMembership(ctx context.Context, runevent *info. } for { - users, resp, err := v.Client().Organizations.ListMembers(ctx, runevent.Organization, opt) + users, resp, err := wrapAPI(v, "list_org_members", func() ([]*github.User, *github.Response, error) { + return v.Client().Organizations.ListMembers(ctx, runevent.Organization, opt) + }) // If we are 404 it means we are checking a repo owner and not a org so let's bail out with grace if resp != nil && resp.StatusCode == http.StatusNotFound { return false, nil @@ -282,10 +290,12 @@ func (v *Provider) checkSenderOrgMembership(ctx context.Context, runevent *info. // checkSenderRepoMembership check if user is allowed to run CI. func (v *Provider) checkSenderRepoMembership(ctx context.Context, runevent *info.Event) (bool, error) { - isCollab, _, err := v.Client().Repositories.IsCollaborator(ctx, - runevent.Organization, - runevent.Repository, - runevent.Sender) + isCollab, _, err := wrapAPI(v, "is_collaborator", func() (bool, *github.Response, error) { + return v.Client().Repositories.IsCollaborator(ctx, + runevent.Organization, + runevent.Repository, + runevent.Sender) + }) return isCollab, err } @@ -313,8 +323,10 @@ func (v *Provider) GetStringPullRequestComment(ctx context.Context, runevent *in ListOptions: github.ListOptions{PerPage: v.PaginedNumber}, } for { - comments, resp, err := v.Client().Issues.ListComments(ctx, runevent.Organization, runevent.Repository, - prNumber, opt) + comments, resp, err := wrapAPI(v, "list_issue_comments", func() ([]*github.IssueComment, *github.Response, error) { + return v.Client().Issues.ListComments(ctx, runevent.Organization, runevent.Repository, + prNumber, opt) + }) if err != nil { return nil, err } diff --git a/pkg/provider/github/github.go b/pkg/provider/github/github.go index e48cd7346..19936ee7f 100644 --- a/pkg/provider/github/github.go +++ b/pkg/provider/github/github.go @@ -21,7 +21,6 @@ import ( "github.com/openshift-pipelines/pipelines-as-code/pkg/params/info" "github.com/openshift-pipelines/pipelines-as-code/pkg/params/triggertype" "github.com/openshift-pipelines/pipelines-as-code/pkg/provider" - providerMetrics "github.com/openshift-pipelines/pipelines-as-code/pkg/provider/metrics" "go.uber.org/zap" "golang.org/x/oauth2" "k8s.io/client-go/kubernetes" @@ -75,12 +74,6 @@ func New() *Provider { } func (v *Provider) Client() *github.Client { - providerMetrics.RecordAPIUsage( - v.Logger, - v.providerName, - v.triggerEvent, - v.repo, - ) return v.ghClient } @@ -257,7 +250,9 @@ func parseTS(headerTS string) (time.Time, error) { // but this gives a nice hint to the user into their namespace event of where // the issue was. func (v *Provider) checkWebhookSecretValidity(ctx context.Context, cw clockwork.Clock) error { - rl, resp, err := v.Client().RateLimit.Get(ctx) + rl, resp, err := wrapAPI(v, "check_rate_limit", func() (*github.RateLimits, *github.Response, error) { + return v.Client().RateLimit.Get(ctx) + }) if resp != nil && resp.StatusCode == http.StatusNotFound { v.Logger.Info("skipping checking if token has expired, rate_limit api is not enabled on token") return nil @@ -345,7 +340,9 @@ func (v *Provider) GetTektonDir(ctx context.Context, runevent *info.Event, path, v.Logger.Infof("Using PipelineRun definition from source %s %s on commit SHA %s", runevent.TriggerTarget.String(), prInfo, runevent.SHA) } - rootobjects, _, err := v.Client().Git.GetTree(ctx, runevent.Organization, runevent.Repository, revision, false) + rootobjects, _, err := wrapAPI(v, "get_root_tree", func() (*github.Tree, *github.Response, error) { + return v.Client().Git.GetTree(ctx, runevent.Organization, runevent.Repository, revision, false) + }) if err != nil { return "", err } @@ -367,8 +364,10 @@ func (v *Provider) GetTektonDir(ctx context.Context, runevent *info.Event, path, // there is a limit on this recursive calls to 500 entries, as documented here: // https://docs.github.com/en/rest/reference/git#get-a-tree // so we may need to address it in the future. - tektonDirObjects, _, err := v.Client().Git.GetTree(ctx, runevent.Organization, runevent.Repository, tektonDirSha, - true) + tektonDirObjects, _, err := wrapAPI(v, "get_tekton_tree", func() (*github.Tree, *github.Response, error) { + return v.Client().Git.GetTree(ctx, runevent.Organization, runevent.Repository, tektonDirSha, + true) + }) if err != nil { return "", err } @@ -388,14 +387,18 @@ func (v *Provider) GetCommitInfo(ctx context.Context, runevent *info.Event) erro var commit *github.Commit sha := runevent.SHA if runevent.SHA == "" && runevent.HeadBranch != "" { - branchinfo, _, err := v.Client().Repositories.GetBranch(ctx, runevent.Organization, runevent.Repository, runevent.HeadBranch, 1) + branchinfo, _, err := wrapAPI(v, "get_branch_info", func() (*github.Branch, *github.Response, error) { + return v.Client().Repositories.GetBranch(ctx, runevent.Organization, runevent.Repository, runevent.HeadBranch, 1) + }) if err != nil { return err } sha = branchinfo.Commit.GetSHA() } var err error - commit, _, err = v.Client().Git.GetCommit(ctx, runevent.Organization, runevent.Repository, sha) + commit, _, err = wrapAPI(v, "get_commit", func() (*github.Commit, *github.Response, error) { + return v.Client().Git.GetCommit(ctx, runevent.Organization, runevent.Repository, sha) + }) if err != nil { return err } @@ -418,8 +421,10 @@ func (v *Provider) GetFileInsideRepo(ctx context.Context, runevent *info.Event, ref = runevent.DefaultBranch } - fp, objects, _, err := v.Client().Repositories.GetContents(ctx, runevent.Organization, - runevent.Repository, path, &github.RepositoryContentGetOptions{Ref: ref}) + fp, objects, _, err := wrapAPIGetContents(v, "get_file_contents", func() (*github.RepositoryContent, []*github.RepositoryContent, *github.Response, error) { + return v.Client().Repositories.GetContents(ctx, runevent.Organization, + runevent.Repository, path, &github.RepositoryContentGetOptions{Ref: ref}) + }) if err != nil { return "", err } @@ -460,7 +465,9 @@ func (v *Provider) concatAllYamlFiles(ctx context.Context, objects []*github.Tre // getPullRequest get a pull request details. func (v *Provider) getPullRequest(ctx context.Context, runevent *info.Event) (*info.Event, error) { - pr, _, err := v.Client().PullRequests.Get(ctx, runevent.Organization, runevent.Repository, runevent.PullRequestNumber) + pr, _, err := wrapAPI(v, "get_pull_request", func() (*github.PullRequest, *github.Response, error) { + return v.Client().PullRequests.Get(ctx, runevent.Organization, runevent.Repository, runevent.PullRequestNumber) + }) if err != nil { return runevent, err } @@ -499,7 +506,9 @@ func (v *Provider) GetFiles(ctx context.Context, runevent *info.Event) (changedf opt := &github.ListOptions{PerPage: v.PaginedNumber} changedFiles := changedfiles.ChangedFiles{} for { - repoCommit, resp, err := v.Client().PullRequests.ListFiles(ctx, runevent.Organization, runevent.Repository, runevent.PullRequestNumber, opt) + repoCommit, resp, err := wrapAPI(v, "list_pull_request_files", func() ([]*github.CommitFile, *github.Response, error) { + return v.Client().PullRequests.ListFiles(ctx, runevent.Organization, runevent.Repository, runevent.PullRequestNumber, opt) + }) if err != nil { return changedfiles.ChangedFiles{}, err } @@ -528,7 +537,9 @@ func (v *Provider) GetFiles(ctx context.Context, runevent *info.Event) (changedf if runevent.TriggerTarget == "push" { changedFiles := changedfiles.ChangedFiles{} - rC, _, err := v.Client().Repositories.GetCommit(ctx, runevent.Organization, runevent.Repository, runevent.SHA, &github.ListOptions{}) + rC, _, err := wrapAPI(v, "get_commit_files", func() (*github.RepositoryCommit, *github.Response, error) { + return v.Client().Repositories.GetCommit(ctx, runevent.Organization, runevent.Repository, runevent.SHA, &github.ListOptions{}) + }) if err != nil { return changedfiles.ChangedFiles{}, err } @@ -554,7 +565,9 @@ func (v *Provider) GetFiles(ctx context.Context, runevent *info.Event) (changedf // getObject Get an object from a repository. func (v *Provider) getObject(ctx context.Context, sha string, runevent *info.Event) ([]byte, error) { - blob, _, err := v.Client().Git.GetBlob(ctx, runevent.Organization, runevent.Repository, sha) + blob, _, err := wrapAPI(v, "get_blob", func() (*github.Blob, *github.Response, error) { + return v.Client().Git.GetBlob(ctx, runevent.Organization, runevent.Repository, sha) + }) if err != nil { return nil, err } @@ -576,7 +589,9 @@ func ListRepos(ctx context.Context, v *Provider) ([]string, error) { opt := &github.ListOptions{PerPage: v.PaginedNumber} repoURLs := []string{} for { - repoList, resp, err := v.Client().Apps.ListRepos(ctx, opt) + repoList, resp, err := wrapAPI(v, "list_app_repos", func() (*github.ListRepositories, *github.Response, error) { + return v.Client().Apps.ListRepos(ctx, opt) + }) if err != nil { return []string{}, err } @@ -594,7 +609,9 @@ func ListRepos(ctx context.Context, v *Provider) ([]string, error) { func (v *Provider) CreateToken(ctx context.Context, repository []string, event *info.Event) (string, error) { for _, r := range repository { split := strings.Split(r, "/") - infoData, _, err := v.Client().Repositories.Get(ctx, split[0], split[1]) + infoData, _, err := wrapAPI(v, "get_repository", func() (*github.Repository, *github.Response, error) { + return v.Client().Repositories.Get(ctx, split[0], split[1]) + }) if err != nil { v.Logger.Warn("we have an invalid repository: `%s` or no access to it: %v", r, err) continue @@ -630,7 +647,9 @@ func (v *Provider) isHeadCommitOfBranch(ctx context.Context, runevent *info.Even "exiting... (hint: did you forget setting a secret on your repo?)") } - branchInfo, _, err := v.Client().Repositories.GetBranch(ctx, runevent.Organization, runevent.Repository, branchName, 1) + branchInfo, _, err := wrapAPI(v, "get_branch", func() (*github.Branch, *github.Response, error) { + return v.Client().Repositories.GetBranch(ctx, runevent.Organization, runevent.Repository, branchName, 1) + }) if err != nil { return err } @@ -656,11 +675,13 @@ func (v *Provider) CreateComment(ctx context.Context, event *info.Event, commit, // List last page of the comments of the PR if updateMarker != "" { - comments, _, err := v.Client().Issues.ListComments(ctx, event.Organization, event.Repository, event.PullRequestNumber, &github.IssueListCommentsOptions{ - ListOptions: github.ListOptions{ - Page: 1, - PerPage: 100, - }, + comments, _, err := wrapAPI(v, "list_comments", func() ([]*github.IssueComment, *github.Response, error) { + return v.Client().Issues.ListComments(ctx, event.Organization, event.Repository, event.PullRequestNumber, &github.IssueListCommentsOptions{ + ListOptions: github.ListOptions{ + Page: 1, + PerPage: 100, + }, + }) }) if err != nil { return err @@ -669,16 +690,22 @@ func (v *Provider) CreateComment(ctx context.Context, event *info.Event, commit, re := regexp.MustCompile(regexp.QuoteMeta(updateMarker)) for _, comment := range comments { if re.MatchString(comment.GetBody()) { - _, _, err := v.Client().Issues.EditComment(ctx, event.Organization, event.Repository, comment.GetID(), &github.IssueComment{ - Body: &commit, - }) - return err + if _, _, err := wrapAPI(v, "edit_comment", func() (*github.IssueComment, *github.Response, error) { + return v.Client().Issues.EditComment(ctx, event.Organization, event.Repository, comment.GetID(), &github.IssueComment{ + Body: &commit, + }) + }); err != nil { + return err + } + return nil } } } - _, _, err := v.Client().Issues.CreateComment(ctx, event.Organization, event.Repository, event.PullRequestNumber, &github.IssueComment{ - Body: &commit, + _, _, err := wrapAPI(v, "create_comment", func() (*github.IssueComment, *github.Response, error) { + return v.Client().Issues.CreateComment(ctx, event.Organization, event.Repository, event.PullRequestNumber, &github.IssueComment{ + Body: &commit, + }) }) return err } diff --git a/pkg/provider/github/parse_payload.go b/pkg/provider/github/parse_payload.go index 0e05ccca5..b91e816fb 100644 --- a/pkg/provider/github/parse_payload.go +++ b/pkg/provider/github/parse_payload.go @@ -233,7 +233,9 @@ func (v *Provider) getPullRequestsWithCommit(ctx context.Context, sha, org, repo for { // Use the "List pull requests associated with a commit" API to check if the commit is part of any open PR - prs, resp, err := v.Client().PullRequests.ListPullRequestsWithCommit(ctx, org, repo, sha, opts) + prs, resp, err := wrapAPI(v, "list_pull_requests_with_commit", func() ([]*github.PullRequest, *github.Response, error) { + return v.Client().PullRequests.ListPullRequestsWithCommit(ctx, org, repo, sha, opts) + }) if err != nil { // Log the error for debugging purposes v.Logger.Debugf("Failed to list pull requests for commit %s in %s/%s: %v", sha, org, repo, err) diff --git a/pkg/provider/github/profiler.go b/pkg/provider/github/profiler.go new file mode 100644 index 000000000..8a87d81c8 --- /dev/null +++ b/pkg/provider/github/profiler.go @@ -0,0 +1,149 @@ +package github + +import ( + "fmt" + "net/http" + "strconv" + "time" + + "github.com/google/go-github/v71/github" + providerMetrics "github.com/openshift-pipelines/pipelines-as-code/pkg/provider/metrics" +) + +const ( + // Rate limit warning thresholds. + rateLimitCritical = 50 // Warn when remaining calls < 50 + rateLimitWarning = 100 // Warn when remaining calls < 100 + rateLimitInfo = 500 // Info when remaining calls < 500 +) + +// checkRateLimit monitors GitHub API rate limits and logs warnings when limits are running low. +func (v *Provider) checkRateLimit(resp *github.Response) (remaining string) { + if resp == nil || resp.Response == nil { + return "" + } + + // Extract rate limit information from headers (using canonical header keys) + values := resp.Header[http.CanonicalHeaderKey("X-RateLimit-Remaining")] + if len(values) == 0 { + return "" + } + + remaining = values[0] + if remaining == "" { + return remaining + } + + // Parse remaining count + remainingCount, err := strconv.Atoi(remaining) + if err != nil { + v.Logger.Debugf("Failed to parse rate limit remaining: %s", remaining) + return remaining + } + + // Get additional rate limit context + limit := "" + reset := "" + if limitValues := resp.Header[http.CanonicalHeaderKey("X-RateLimit-Limit")]; len(limitValues) > 0 { + limit = limitValues[0] + } + if resetValues := resp.Header[http.CanonicalHeaderKey("X-RateLimit-Reset")]; len(resetValues) > 0 { + reset = resetValues[0] + // Convert Unix timestamp to human readable time + if resetTimestamp, err := strconv.ParseInt(reset, 10, 64); err == nil { + resetTime := time.Unix(resetTimestamp, 0) + reset = fmt.Sprintf("%s (%s)", reset, resetTime.Format("15:04:05 MST")) + } + } + + repoName := "" + if v.repo != nil { + repoName = fmt.Sprintf("%s/%s", v.repo.Namespace, v.repo.Name) + } + + // Log warnings based on thresholds + logFields := []any{ + "repo", repoName, + "remaining", remainingCount, + "limit", limit, + "reset", reset, + } + switch { + case remainingCount < rateLimitCritical: + v.Logger.Errorw("GitHub API rate limit critically low", logFields...) + case remainingCount < rateLimitWarning: + v.Logger.Warnw("GitHub API rate limit running low", logFields...) + case remainingCount < rateLimitInfo: + v.Logger.Infow("GitHub API rate limit moderate", logFields...) + } + + return remaining +} + +// wrapAPI wraps a GitHub API call with logging, metrics, and operation context. +func wrapAPI[T any](v *Provider, operation string, call func() (T, *github.Response, error)) (T, *github.Response, error) { + // This check ensures we only profile if a logger is available. + if v.Logger == nil { + return call() + } + + start := time.Now() + data, resp, err := call() + duration := time.Since(start) + + v.logAPICall(operation, duration, resp, err) + + return data, resp, err +} + +func (v *Provider) logAPICall(operation string, duration time.Duration, resp *github.Response, err error) { + providerMetrics.RecordAPIUsage(v.Logger, v.providerName, v.triggerEvent, v.repo) + + // Build structured logging context + logFields := []any{ + "operation", operation, + "duration_ms", duration.Milliseconds(), + "provider", "github", + } + + // Add repository context if available + if v.repo != nil { + logFields = append(logFields, "repo", fmt.Sprintf("%s/%s", v.repo.Namespace, v.repo.Name)) + } + + // Add response context if available + if resp != nil { + remaining := v.checkRateLimit(resp) + logFields = append(logFields, + "url_path", resp.Request.URL.Path, + "rate_limit_remaining", remaining, + ) + if resp.StatusCode > 0 { + logFields = append(logFields, "status_code", resp.StatusCode) + } + } + + // Log based on success/failure with appropriate level + if err != nil { + logFields = append(logFields, "error", err.Error()) + v.Logger.Errorw("GitHub API call failed", logFields...) + } else { + v.Logger.Debugw("GitHub API call completed", logFields...) + } +} + +// wrapAPIGetContents wraps the GetContents API call with operation context. +func wrapAPIGetContents(v *Provider, operation string, call func() (*github.RepositoryContent, []*github.RepositoryContent, *github.Response, error)) (*github.RepositoryContent, []*github.RepositoryContent, *github.Response, error) { + // This check ensures we only profile if a logger is available. + if v.Logger == nil { + return call() + } + + start := time.Now() + file, dir, resp, err := call() + duration := time.Since(start) + + v.logAPICall(operation, duration, resp, err) + + return file, dir, resp, err +} diff --git a/pkg/provider/github/profiler_test.go b/pkg/provider/github/profiler_test.go new file mode 100644 index 000000000..63c338797 --- /dev/null +++ b/pkg/provider/github/profiler_test.go @@ -0,0 +1,353 @@ +package github + +import ( + "fmt" + "net/http" + "net/url" + "strings" + "testing" + + "github.com/google/go-github/v71/github" + "github.com/openshift-pipelines/pipelines-as-code/pkg/apis/pipelinesascode/v1alpha1" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" + "go.uber.org/zap/zaptest/observer" + "gotest.tools/v3/assert" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestWrapAPI(t *testing.T) { + // Test case: Logger is nil + t.Run("Logger is nil", func(t *testing.T) { + p := &Provider{} + called := false + call := func() (string, *github.Response, error) { + called = true + return "test", nil, nil + } + + data, resp, err := wrapAPI(p, "test_api_call", call) + assert.Assert(t, called, "Original call should be made") + assert.Equal(t, "test", data) + assert.Assert(t, resp == nil) + assert.Assert(t, err == nil) + }) + + // Test case: Logger is not nil + t.Run("Logger is not nil", func(t *testing.T) { + observedZapCore, observedLogs := observer.New(zap.DebugLevel) + observedLogger := zap.New(observedZapCore).Sugar() + p := &Provider{ + Logger: observedLogger, + providerName: "github", + triggerEvent: "pull_request", + repo: &v1alpha1.Repository{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "test-ns", + Name: "test-repo", + }, + }, + } + called := false + reqURL, _ := url.Parse("https://api.github.com/test") + headers := http.Header{} + headers.Set("X-RateLimit-Remaining", "99") + resp := &github.Response{ + Response: &http.Response{ + Request: &http.Request{URL: reqURL}, + Header: headers, + }, + } + call := func() (string, *github.Response, error) { + called = true + return "data", resp, fmt.Errorf("error") + } + + data, r, e := wrapAPI(p, "test_api_call", call) + + assert.Assert(t, called) + assert.Equal(t, "data", data) + assert.Equal(t, r, resp) + assert.Error(t, e, "error") + + logs := observedLogs.All() + assert.Assert(t, len(logs) > 0, "Should have log entries") + + // Find the API call log entry + var apiCallLog *observer.LoggedEntry + for i := range logs { + if logs[i].Message == "GitHub API call failed" { + apiCallLog = &logs[i] + break + } + } + assert.Assert(t, apiCallLog != nil, "Should have API call failed log entry") + + // Check structured fields + foundOperation := false + foundProvider := false + foundRepo := false + for _, field := range apiCallLog.Context { + switch field.Key { + case "operation": + assert.Equal(t, "test_api_call", field.String) + foundOperation = true + case "provider": + assert.Equal(t, "github", field.String) + foundProvider = true + case "repo": + assert.Equal(t, "test-ns/test-repo", field.String) + foundRepo = true + } + } + + assert.Assert(t, foundOperation, "Should have operation field") + assert.Assert(t, foundProvider, "Should have provider field") + assert.Assert(t, foundRepo, "Should have repo field") + }) + + // Test case: Logger is not nil and response is nil + t.Run("Logger is not nil, response is nil", func(t *testing.T) { + observedZapCore, observedLogs := observer.New(zap.DebugLevel) + observedLogger := zap.New(observedZapCore).Sugar() + p := &Provider{ + Logger: observedLogger, + providerName: "github", + triggerEvent: "pull_request", + } + call := func() (string, *github.Response, error) { + return "data", nil, nil + } + + _, _, _ = wrapAPI(p, "test_api_call", call) + + logs := observedLogs.All() + assert.Assert(t, len(logs) > 0, "Should have log entries") + + // Find the API call log entry + var apiCallLog *observer.LoggedEntry + for i := range logs { + if logs[i].Message == "GitHub API call completed" { + apiCallLog = &logs[i] + break + } + } + assert.Assert(t, apiCallLog != nil, "Should have API call completed log entry") + + // Check that it has operation but no URL/rate limit info + foundOperation := false + foundURL := false + foundRateLimit := false + for _, field := range apiCallLog.Context { + switch field.Key { + case "operation": + foundOperation = true + case "url_path": + foundURL = true + case "rate_limit_remaining": + foundRateLimit = true + } + } + + assert.Assert(t, foundOperation, "Should have operation field") + assert.Assert(t, !foundURL, "Should not have URL field when response is nil") + assert.Assert(t, !foundRateLimit, "Should not have rate limit field when response is nil") + }) +} + +func TestRateLimitWarnings(t *testing.T) { + tests := []struct { + name string + remaining string + limit string + reset string + expectedLogLevel zapcore.Level + expectedMessage string + }{ + { + name: "Critical rate limit", + remaining: "25", + limit: "5000", + reset: "1672531200", // 2023-01-01 00:00:00 UTC + expectedLogLevel: zap.ErrorLevel, + expectedMessage: "GitHub API rate limit critically low", + }, + { + name: "Warning rate limit", + remaining: "75", + limit: "5000", + reset: "1672531200", + expectedLogLevel: zap.WarnLevel, + expectedMessage: "GitHub API rate limit running low", + }, + { + name: "Info rate limit", + remaining: "300", + limit: "5000", + reset: "1672531200", + expectedLogLevel: zap.InfoLevel, + expectedMessage: "GitHub API rate limit moderate", + }, + { + name: "Normal rate limit", + remaining: "1000", + limit: "5000", + reset: "1672531200", + expectedMessage: "", // No message expected + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + observedZapCore, observedLogs := observer.New(zap.DebugLevel) + observedLogger := zap.New(observedZapCore).Sugar() + p := &Provider{ + Logger: observedLogger, + providerName: "github", + triggerEvent: "pull_request", + repo: &v1alpha1.Repository{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "test-ns", + Name: "test-repo", + }, + }, + } + + reqURL, _ := url.Parse("https://api.github.com/test") + headers := http.Header{} + headers.Set("X-RateLimit-Remaining", tt.remaining) + if tt.limit != "" { + headers.Set("X-RateLimit-Limit", tt.limit) + } + if tt.reset != "" { + headers.Set("X-RateLimit-Reset", tt.reset) + } + + resp := &github.Response{ + Response: &http.Response{ + Request: &http.Request{URL: reqURL}, + Header: headers, + }, + } + + call := func() (string, *github.Response, error) { + return "data", resp, nil + } + + _, _, _ = wrapAPI(p, "test_api_call", call) + + if tt.expectedMessage == "" { + for _, log := range observedLogs.All() { + assert.Assert(t, !strings.Contains(log.Message, "rate limit")) + } + return + } + + var rateLimitLog *observer.LoggedEntry + for i := range observedLogs.All() { + if strings.Contains(observedLogs.All()[i].Message, "rate limit") { + rateLimitLog = &observedLogs.All()[i] + break + } + } + + assert.Assert(t, rateLimitLog != nil, "expected log message not found") + assert.Equal(t, tt.expectedLogLevel, rateLimitLog.Level) + assert.Equal(t, tt.expectedMessage, rateLimitLog.Message) + }) + } +} + +func TestWrapGetContents(t *testing.T) { + // Test case: Logger is nil + t.Run("Logger is nil", func(t *testing.T) { + p := &Provider{} + called := false + call := func() (*github.RepositoryContent, []*github.RepositoryContent, *github.Response, error) { + called = true + return nil, nil, nil, nil + } + + _, _, _, _ = wrapAPIGetContents(p, "get_contents", call) + assert.Assert(t, called, "Original call should be made") + }) + + // Test case: Logger is not nil + t.Run("Logger is not nil", func(t *testing.T) { + observedZapCore, observedLogs := observer.New(zap.DebugLevel) + observedLogger := zap.New(observedZapCore).Sugar() + p := &Provider{ + Logger: observedLogger, + providerName: "github", + triggerEvent: "pull_request", + repo: &v1alpha1.Repository{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "test-ns", + Name: "test-repo", + }, + }, + } + reqURL, _ := url.Parse("https://api.github.com/contents") + headers := http.Header{} + headers.Set("X-RateLimit-Remaining", "42") + resp := &github.Response{ + Response: &http.Response{ + Request: &http.Request{URL: reqURL}, + Header: headers, + }, + } + fileContent := &github.RepositoryContent{Name: github.Ptr("file")} + dirContent := []*github.RepositoryContent{{Name: github.Ptr("dir_file")}} + + call := func() (*github.RepositoryContent, []*github.RepositoryContent, *github.Response, error) { + return fileContent, dirContent, resp, fmt.Errorf("contents error") + } + + file, dir, r, e := wrapAPIGetContents(p, "get_contents", call) + + assert.Equal(t, file, fileContent) + assert.Equal(t, len(dir), 1) + assert.Equal(t, r, resp) + assert.Error(t, e, "contents error") + + logs := observedLogs.All() + assert.Assert(t, len(logs) > 0, "Should have log entries") + + // Find the API call log entry + var apiCallLog *observer.LoggedEntry + for i := range logs { + if logs[i].Message == "GitHub API call failed" { + apiCallLog = &logs[i] + break + } + } + assert.Assert(t, apiCallLog != nil, "Should have API call failed log entry") + + // Check structured fields + foundOperation := false + foundProvider := false + foundRepo := false + foundRateLimit := false + for _, field := range apiCallLog.Context { + switch field.Key { + case "operation": + assert.Equal(t, "get_contents", field.String) + foundOperation = true + case "provider": + assert.Equal(t, "github", field.String) + foundProvider = true + case "repo": + assert.Equal(t, "test-ns/test-repo", field.String) + foundRepo = true + case "rate_limit_remaining": + assert.Equal(t, "42", field.String) + foundRateLimit = true + } + } + + assert.Assert(t, foundOperation, "Should have operation field") + assert.Assert(t, foundProvider, "Should have provider field") + assert.Assert(t, foundRepo, "Should have repo field") + assert.Assert(t, foundRateLimit, "Should have rate limit field") + }) +} diff --git a/pkg/provider/github/status.go b/pkg/provider/github/status.go index bbd466151..b088eb697 100644 --- a/pkg/provider/github/status.go +++ b/pkg/provider/github/status.go @@ -43,11 +43,13 @@ const taskStatusTemplate = ` func (v *Provider) getExistingCheckRunID(ctx context.Context, runevent *info.Event, status provider.StatusOpts) (*int64, error) { opt := github.ListOptions{PerPage: v.PaginedNumber} for { - res, resp, err := v.Client().Checks.ListCheckRunsForRef(ctx, runevent.Organization, runevent.Repository, - runevent.SHA, &github.ListCheckRunsOptions{ - AppID: v.ApplicationID, - ListOptions: opt, - }) + res, resp, err := wrapAPI(v, "list_check_runs_for_ref", func() (*github.ListCheckRunsResults, *github.Response, error) { + return v.Client().Checks.ListCheckRunsForRef(ctx, runevent.Organization, runevent.Repository, + runevent.SHA, &github.ListCheckRunsOptions{ + AppID: v.ApplicationID, + ListOptions: opt, + }) + }) if err != nil { return nil, err } @@ -118,7 +120,9 @@ func (v *Provider) createCheckRunStatus(ctx context.Context, runevent *info.Even StartedAt: &now, } - checkRun, _, err := v.Client().Checks.CreateCheckRun(ctx, runevent.Organization, runevent.Repository, checkrunoption) + checkRun, _, err := wrapAPI(v, "create_check_run", func() (*github.CheckRun, *github.Response, error) { + return v.Client().Checks.CreateCheckRun(ctx, runevent.Organization, runevent.Repository, checkrunoption) + }) if err != nil { return nil, err } @@ -270,7 +274,9 @@ func (v *Provider) getOrUpdateCheckRunStatus(ctx context.Context, runevent *info opts.Conclusion = github.Ptr("cancelled") } - _, _, err = v.Client().Checks.UpdateCheckRun(ctx, runevent.Organization, runevent.Repository, *checkRunID, opts) + _, _, err = wrapAPI(v, "update_check_run", func() (*github.CheckRun, *github.Response, error) { + return v.Client().Checks.UpdateCheckRun(ctx, runevent.Organization, runevent.Repository, *checkRunID, opts) + }) return err } @@ -323,8 +329,10 @@ func (v *Provider) createStatusCommit(ctx context.Context, runevent *info.Event, CreatedAt: &github.Timestamp{Time: now}, } - if _, _, err := v.Client().Repositories.CreateStatus(ctx, - runevent.Organization, runevent.Repository, runevent.SHA, ghstatus); err != nil { + if _, _, err := wrapAPI(v, "create_status", func() (*github.RepoStatus, *github.Response, error) { + return v.Client().Repositories.CreateStatus(ctx, + runevent.Organization, runevent.Repository, runevent.SHA, ghstatus) + }); err != nil { return err } eventType := triggertype.IsPullRequestType(runevent.EventType) @@ -344,12 +352,14 @@ func (v *Provider) createStatusCommit(ctx context.Context, runevent *info.Event, default: if (status.Status == "completed" || (status.Status == "queued" && status.Title == pendingApproval)) && status.Text != "" && eventType == triggertype.PullRequest { - _, _, err = v.Client().Issues.CreateComment(ctx, runevent.Organization, runevent.Repository, - runevent.PullRequestNumber, - &github.IssueComment{ - Body: github.Ptr(fmt.Sprintf("%s
%s", status.Summary, status.Text)), - }, - ) + _, _, err = wrapAPI(v, "create_issue_comment", func() (*github.IssueComment, *github.Response, error) { + return v.Client().Issues.CreateComment(ctx, runevent.Organization, runevent.Repository, + runevent.PullRequestNumber, + &github.IssueComment{ + Body: github.Ptr(fmt.Sprintf("%s
%s", status.Summary, status.Text)), + }, + ) + }) if err != nil { return err } diff --git a/test/README.md b/test/README.md index a3e2c7ea4..aa8c98fd7 100644 --- a/test/README.md +++ b/test/README.md @@ -58,6 +58,8 @@ this repo should differ from the one which is configured as part of `TEST_GITHUB - `TEST_BITBUCKET_SERVER_API_URL` - URL where your Bitbucket Data Center instance is running. - `TEST_BITBUCKET_SERVER_WEBHOOK_SECRET` - Webhook secret +- `PAC_API_INSTRUMENTATION_DIR` - Optional. When set, E2E tests write per-test JSON reports of GitHub API calls parsed from controller logs to this directory. Useful for analyzing API usage and rate limits. Example: `export PAC_API_INSTRUMENTATION_DIR=/tmp/api-instrumentation`. + You don't need to configure all of those if you restrict running your e2e tests to a subset. ## Running @@ -155,6 +157,57 @@ The `hack/gh-workflow-ci.sh` script contains several functions that assist in th The script filters tests by category using pattern matching on test function names. +> [!NOTE] +> For details on how API call metrics are generated and archived as artifacts, see [API Instrumentation (optional)](#api-instrumentation-optional). + +### API Instrumentation (optional) + +To help debug and analyze GitHub API usage during E2E runs, tests can emit +structured JSON reports of API calls when the environment variable +`PAC_API_INSTRUMENTATION_DIR` is set. + +> [!NOTE] +> Currently supported only for GitHub (both GitHub App and GitHub webhook flows). Support for other providers is planned. + +- Set `PAC_API_INSTRUMENTATION_DIR` to a writable path before running tests, +for example: + - `export PAC_API_INSTRUMENTATION_DIR=/tmp/api-instrumentation` +- Each test produces a file named like `YYYY-MM-DDTHH-MM-SS_.json` containing summary fields and an array of API calls (operation, duration_ms, url_path, status_code, rate_limit_remaining, provider, repo). +- In CI, this variable defaults to `/tmp/api-instrumentation` and `hack/gh-workflow-ci.sh collect_logs` copies the directory into the uploaded artifacts. + +Log source details: + +- Parses controller pod logs from the `pac-controller` container. +- Uses label selector `app.kubernetes.io/name=controller` (or `ghe-controller` when testing against GHE). +- Considers only log lines after the last occurrence of `github-app: initialized OAuth2 client`. +- Matches lines containing `GitHub API call completed` and extracts the embedded JSON payload. + +Sample output: + +```json +{ + "test_name": "TestGithubAppSimple", + "timestamp": "2025-08-05T16:12:20Z", + "controller": "controller", + "pr_number": 123, + "sha": "abcdef1", + "target_namespace": "pac-e2e-ns-xyz12", + "total_calls": 2, + "oauth2_marker_line": 42, + "github_api_calls": [ + { + "operation": "get_commit", + "duration_ms": 156, + "url_path": "/api/v3/repos/org/repo/git/commits/62a0...", + "rate_limit_remaining": "", + "status_code": 200, + "provider": "github", + "repo": "org/repo" + } + ] +} +``` + ### Test Execution Flow 1. **Setup**: @@ -176,6 +229,7 @@ The script filters tests by category using pattern matching on test function nam - Collect logs regardless of test outcome - Detect any panic in the controller logs - Upload artifacts to GitHub Actions + - If `PAC_API_INSTRUMENTATION_DIR` is set, include API instrumentation reports (see [API Instrumentation (optional)](#api-instrumentation-optional)) ### Debugging CI diff --git a/test/pkg/github/instrumentation.go b/test/pkg/github/instrumentation.go new file mode 100644 index 000000000..7a80e3d59 --- /dev/null +++ b/test/pkg/github/instrumentation.go @@ -0,0 +1,296 @@ +package github + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "regexp" + "strings" + "testing" + "time" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// InstrumentationAPICall represents a single GitHub API call with its metadata. +type InstrumentationAPICall struct { + Operation string `json:"operation"` + DurationMs int64 `json:"duration_ms"` + URLPath string `json:"url_path"` + RateLimitRemaining string `json:"rate_limit_remaining"` + StatusCode int `json:"status_code"` + Provider string `json:"provider"` + Repo string `json:"repo"` +} + +// TestResult represents the complete test result with GitHub API calls. +type TestResult struct { + TestName string `json:"test_name"` + Timestamp string `json:"timestamp"` + Controller string `json:"controller"` + PRNumber int `json:"pr_number"` + SHA string `json:"sha"` + TargetNamespace string `json:"target_namespace"` + GitHubAPICalls []InstrumentationAPICall `json:"github_api_calls"` + TotalCalls int `json:"total_calls"` + OAuth2MarkerLine int `json:"oauth2_marker_line"` +} + +// collectGitHubAPICalls collects GitHub API calls from the controller logs. +// It retrieves logs from the controller pod, searches for the last OAuth2 initialization marker, +// the Oauth2 marker shows even if the controller is not using github app, so +// we can use it to find the start of the API calls. +// TODO(chmouel): Fix Oauth2 marker to be more specific to GitHub App usage and +// not github webhook. use another marker for github webhook. +// TODO(chmouel): Add support for Gitlab. +// TODO(chmouel): Add support for Bitbucket. +// TODO(chmouel): Add support for Gitea. +func (g *PRTest) collectGitHubAPICalls(ctx context.Context, _ *testing.T) { + numLines := int64(100) + controllerName := "controller" + if g.SecondController { + controllerName = "ghe-controller" + } + labelselector := fmt.Sprintf("app.kubernetes.io/name=%s", controllerName) + containerName := "pac-controller" + + g.Logger.Infof("Attempting to collect GitHub API calls from controller: %s", controllerName) + + // Get controller logs using the existing pattern + logs, err := g.Cnx.Clients.Kube.CoreV1().Pods("").List(ctx, metav1.ListOptions{ + LabelSelector: labelselector, + }) + if err != nil { + g.Logger.Warnf("Failed to get controller pods with label selector '%s': %v", labelselector, err) + return + } + + g.Logger.Infof("Found %d pods with label selector '%s'", len(logs.Items), labelselector) + + if len(logs.Items) == 0 { + g.Logger.Warnf("No controller pods found with label selector: %s", labelselector) + // Try to list all pods to see what's available + allPods, err := g.Cnx.Clients.Kube.CoreV1().Pods("").List(ctx, metav1.ListOptions{}) + if err != nil { + g.Logger.Warnf("Failed to list all pods: %v", err) + return + } + g.Logger.Infof("Available pods: %d total", len(allPods.Items)) + for _, pod := range allPods.Items { + if strings.Contains(pod.Name, "controller") { + g.Logger.Infof(" - %s (namespace: %s, labels: %v)", pod.Name, pod.Namespace, pod.Labels) + } + } + return + } + + // Log details about the found pods + for i, pod := range logs.Items { + g.Logger.Infof("Pod %d: %s (namespace: %s, status: %s)", i+1, pod.Name, pod.Namespace, pod.Status.Phase) + } + + // Get logs from the first controller pod + selectedPod := logs.Items[0] + g.Logger.Infof("Attempting to get logs from pod: %s in namespace: %s", selectedPod.Name, selectedPod.Namespace) + + // Try to get logs with more specific options + podLogs, err := g.Cnx.Clients.Kube.CoreV1().Pods(selectedPod.Namespace).GetLogs(selectedPod.Name, &corev1.PodLogOptions{ + Container: containerName, + TailLines: &numLines, + Previous: false, // Don't get previous container logs + }).Do(ctx).Raw() + if err != nil { + g.Logger.Warnf("Failed to get controller logs from pod %s: %v", selectedPod.Name, err) + + // Try to get pod details to understand the issue + podDetails, err := g.Cnx.Clients.Kube.CoreV1().Pods(selectedPod.Namespace).Get(ctx, selectedPod.Name, metav1.GetOptions{}) + if err != nil { + g.Logger.Warnf("Failed to get pod details for %s: %v", selectedPod.Name, err) + } else { + g.Logger.Infof("Pod %s status: %s, containers: %v", selectedPod.Name, podDetails.Status.Phase, podDetails.Status.ContainerStatuses) + + // Check if the container exists and is ready + for _, container := range podDetails.Status.ContainerStatuses { + if container.Name == containerName { + g.Logger.Infof("Container %s ready: %v, restart count: %d", container.Name, container.Ready, container.RestartCount) + } + } + } + + // Try alternative approach - get logs without specifying container + g.Logger.Infof("Trying to get logs without specifying container name...") + podLogs, err = g.Cnx.Clients.Kube.CoreV1().Pods(selectedPod.Namespace).GetLogs(selectedPod.Name, &corev1.PodLogOptions{ + TailLines: &numLines, + }).Do(ctx).Raw() + if err != nil { + g.Logger.Warnf("Failed to get logs without container specification: %v", err) + return + } + g.Logger.Infof("Successfully got logs without container specification") + } + + logContent := string(podLogs) + logLines := strings.Split(logContent, "\n") + g.Logger.Infof("Collected %d lines from controller logs", len(logLines)) + + // Find the last OAuth2 initialization marker + oauth2Marker := "github-app: initialized OAuth2 client" + lastOAuth2Index := -1 + for i := len(logLines) - 1; i >= 0; i-- { + if strings.Contains(logLines[i], oauth2Marker) { + lastOAuth2Index = i + break + } + } + + if lastOAuth2Index == -1 { + g.Logger.Infof("OAuth2 initialization marker not found in the last %d lines", numLines) + return + } + + g.Logger.Infof("Found OAuth2 initialization marker at line %d (from end)", len(logLines)-lastOAuth2Index) + + // Extract GitHub API calls only from the last OAuth2 marker onwards + apiCallRegex := regexp.MustCompile(`GitHub API call completed.*`) + var apiCalls []string + + for i := lastOAuth2Index; i < len(logLines); i++ { + line := logLines[i] + if apiCallRegex.MatchString(line) { + apiCalls = append(apiCalls, strings.TrimSpace(line)) + } + } + + g.Logger.Infof("Found %d GitHub API calls after the last OAuth2 initialization", len(apiCalls)) + + // Log each API call for debugging + for i, match := range apiCalls { + g.Logger.Infof("API Call %d: %s", i+1, match) + } + + // Check if we should output to file + outputDir := os.Getenv("PAC_API_INSTRUMENTATION_DIR") + if outputDir != "" { + g.outputTestResultToFile(outputDir, apiCalls, lastOAuth2Index, len(logLines)) + } +} + +// sanitizeFilename converts a test name to a safe filename. +func sanitizeFilename(name string) string { + // Replace spaces and special characters with underscores + re := regexp.MustCompile(`[^a-zA-Z0-9._-]`) + sanitized := re.ReplaceAllString(name, "_") + // Convert to lowercase + return strings.ToLower(sanitized) +} + +// parseAPICallLog parses a GitHub API call log line and extracts structured data. +func parseAPICallLog(logLine string) *InstrumentationAPICall { + // The log line contains a JSON object, so we need to extract and parse it + // Example: "API Call 1: {\"level\":\"debug\",\"ts\":\"2025-08-05T16:12:17.508Z\",...}" + + // Find the JSON part after "API Call X: " + jsonStart := strings.Index(logLine, "{") + if jsonStart == -1 { + return nil + } + + jsonStr := logLine[jsonStart:] + + // Parse the JSON object + var logEntry map[string]interface{} + if err := json.Unmarshal([]byte(jsonStr), &logEntry); err != nil { + return nil + } + + call := &InstrumentationAPICall{} + + // Extract fields from the JSON + if operation, ok := logEntry["operation"].(string); ok { + call.Operation = operation + } + + if durationMs, ok := logEntry["duration_ms"].(float64); ok { + call.DurationMs = int64(durationMs) + } + + if urlPath, ok := logEntry["url_path"].(string); ok { + call.URLPath = urlPath + } + + if rateLimit, ok := logEntry["rate_limit_remaining"].(string); ok { + call.RateLimitRemaining = rateLimit + } + + if statusCode, ok := logEntry["status_code"].(float64); ok { + call.StatusCode = int(statusCode) + } + + if provider, ok := logEntry["provider"].(string); ok { + call.Provider = provider + } + + if repo, ok := logEntry["repo"].(string); ok { + call.Repo = repo + } + + return call +} + +// outputTestResultToFile writes the test result to a JSON file. +func (g *PRTest) outputTestResultToFile(outputDir string, apiCalls []string, lastOAuth2Index, totalLines int) { + // Create output directory if it doesn't exist + if err := os.MkdirAll(outputDir, 0o755); err != nil { + g.Logger.Warnf("Failed to create output directory %s: %v", outputDir, err) + return + } + + // Parse API calls into structured data + var structuredCalls []InstrumentationAPICall + for _, call := range apiCalls { + if parsed := parseAPICallLog(call); parsed != nil { + structuredCalls = append(structuredCalls, *parsed) + } + } + + // Create test result + controllerName := "controller" + if g.SecondController { + controllerName = "ghe-controller" + } + + result := TestResult{ + TestName: g.Label, + Timestamp: time.Now().Format(time.RFC3339), + Controller: controllerName, + PRNumber: g.PRNumber, + SHA: g.SHA, + TargetNamespace: g.TargetNamespace, + GitHubAPICalls: structuredCalls, + TotalCalls: len(structuredCalls), + OAuth2MarkerLine: totalLines - lastOAuth2Index, + } + + // Generate filename + timestamp := time.Now().Format("2006-01-02T15-04-05") + sanitizedTestName := sanitizeFilename(g.Label) + filename := fmt.Sprintf("%s_%s.json", timestamp, sanitizedTestName) + filepath := filepath.Join(outputDir, filename) + + // Write JSON file + jsonData, err := json.MarshalIndent(result, "", " ") + if err != nil { + g.Logger.Warnf("Failed to marshal test result to JSON: %v", err) + return + } + + if err := os.WriteFile(filepath, jsonData, 0o600); err != nil { + g.Logger.Warnf("Failed to write test result to file %s: %v", filepath, err) + return + } + + g.Logger.Infof("Test result written to: %s", filepath) +} diff --git a/test/pkg/github/instrumentation_test.go b/test/pkg/github/instrumentation_test.go new file mode 100644 index 000000000..967caf97d --- /dev/null +++ b/test/pkg/github/instrumentation_test.go @@ -0,0 +1,91 @@ +package github + +import ( + "testing" +) + +func TestParseAPICallLog(t *testing.T) { + // Test with the real log format from the user's example + testCases := []struct { + name string + logLine string + expected *InstrumentationAPICall + }{ + { + name: "parse valid JSON log line", + logLine: `API Call 1: {"level":"debug","ts":"2025-08-05T16:12:17.508Z","logger":"pipelinesascode","caller":"github/profiler.go:131","msg":"GitHub API call completed","commit":"bacf698","provider":"github","event-id":"f4698b50-7216-11f0-9c6b-443ea2de733f","event-sha":"62a0b25ea7bdc3ef0e8789abff8cd797ab6cac25","event-type":"no-ops-comment","source-repo-url":"https://ghe.pipelinesascode.com/chmouel/e2e-gapps","target-branch":"main","source-branch":"pac-e2e-test-mf7r6","namespace":"pac-e2e-ns-jhh9f","operation":"get_commit","duration_ms":156,"provider":"github","repo":"pac-e2e-ns-jhh9f/pac-e2e-ns-jhh9f","url_path":"/api/v3/repos/chmouel/e2e-gapps/git/commits/62a0b25ea7bdc3ef0e8789abff8cd797ab6cac25","rate_limit_remaining":"","status_code":200}`, + expected: &InstrumentationAPICall{ + Operation: "get_commit", + DurationMs: 156, + URLPath: "/api/v3/repos/chmouel/e2e-gapps/git/commits/62a0b25ea7bdc3ef0e8789abff8cd797ab6cac25", + RateLimitRemaining: "", + StatusCode: 200, + Provider: "github", + Repo: "pac-e2e-ns-jhh9f/pac-e2e-ns-jhh9f", + }, + }, + { + name: "parse log line with rate limit", + logLine: `API Call 2: {"level":"debug","ts":"2025-08-05T16:12:17.665Z","logger":"pipelinesascode","caller":"github/profiler.go:131","msg":"GitHub API call completed","operation":"get_root_tree","duration_ms":157,"provider":"github","repo":"pac-e2e-ns-jhh9f/pac-e2e-ns-jhh9f","url_path":"/api/v3/repos/chmouel/e2e-gapps/git/trees/62a0b25ea7bdc3ef0e8789abff8cd797ab6cac25","rate_limit_remaining":"4999","status_code":200}`, + expected: &InstrumentationAPICall{ + Operation: "get_root_tree", + DurationMs: 157, + URLPath: "/api/v3/repos/chmouel/e2e-gapps/git/trees/62a0b25ea7bdc3ef0e8789abff8cd797ab6cac25", + RateLimitRemaining: "4999", + StatusCode: 200, + Provider: "github", + Repo: "pac-e2e-ns-jhh9f/pac-e2e-ns-jhh9f", + }, + }, + { + name: "parse invalid log line", + logLine: "This is not a valid log line", + expected: nil, + }, + { + name: "parse log line without JSON", + logLine: "API Call 1: This is not JSON", + expected: nil, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := parseAPICallLog(tc.logLine) + + if tc.expected == nil { + if result != nil { + t.Errorf("Expected nil result, got %+v", result) + } + return + } + + if result == nil { + t.Errorf("Expected result, got nil") + return + } + + if result.Operation != tc.expected.Operation { + t.Errorf("Expected operation %s, got %s", tc.expected.Operation, result.Operation) + } + if result.DurationMs != tc.expected.DurationMs { + t.Errorf("Expected duration_ms %d, got %d", tc.expected.DurationMs, result.DurationMs) + } + if result.URLPath != tc.expected.URLPath { + t.Errorf("Expected url_path %s, got %s", tc.expected.URLPath, result.URLPath) + } + if result.RateLimitRemaining != tc.expected.RateLimitRemaining { + t.Errorf("Expected rate_limit_remaining %s, got %s", tc.expected.RateLimitRemaining, result.RateLimitRemaining) + } + if result.StatusCode != tc.expected.StatusCode { + t.Errorf("Expected status_code %d, got %d", tc.expected.StatusCode, result.StatusCode) + } + if result.Provider != tc.expected.Provider { + t.Errorf("Expected provider %s, got %s", tc.expected.Provider, result.Provider) + } + if result.Repo != tc.expected.Repo { + t.Errorf("Expected repo %s, got %s", tc.expected.Repo, result.Repo) + } + }) + } +} diff --git a/test/pkg/github/pr.go b/test/pkg/github/pr.go index 796979e75..30955a5ed 100644 --- a/test/pkg/github/pr.go +++ b/test/pkg/github/pr.go @@ -188,6 +188,10 @@ func (g *PRTest) TearDown(ctx context.Context, t *testing.T) { g.Logger.Infof("Not cleaning up and closing PR since TEST_NOCLEANUP is set") return } + + // Collect GitHub API call information from controller logs + g.collectGitHubAPICalls(ctx, t) + g.Logger.Infof("Closing PR %d", g.PRNumber) if g.PRNumber != -1 { state := "closed"