Skip to content

Commit aa098f1

Browse files
Add support for whitelisting bots
Signed-off-by: Lukasz Gryglicki <[email protected]>
1 parent 054a8c8 commit aa098f1

File tree

8 files changed

+288
-8
lines changed

8 files changed

+288
-8
lines changed

cla-backend/cla/models/dynamo_models.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3901,6 +3901,7 @@ class Meta:
39013901
branch_protection_enabled = BooleanAttribute(null=True)
39023902
enabled = BooleanAttribute(null=True)
39033903
note = UnicodeAttribute(null=True)
3904+
skip_cla = MapAttribute(of=UnicodeAttribute, null=True)
39043905

39053906

39063907
class GitHubOrg(model_interfaces.GitHubOrg): # pylint: disable=too-many-public-methods
@@ -3910,7 +3911,7 @@ class GitHubOrg(model_interfaces.GitHubOrg): # pylint: disable=too-many-public-
39103911

39113912
def __init__(
39123913
self, organization_name=None, organization_installation_id=None, organization_sfid=None,
3913-
auto_enabled=False, branch_protection_enabled=False, note=None, enabled=True
3914+
auto_enabled=False, branch_protection_enabled=False, note=None, enabled=True, skip_cla=None,
39143915
):
39153916
super(GitHubOrg).__init__()
39163917
self.model = GitHubOrgModel()
@@ -3923,6 +3924,7 @@ def __init__(
39233924
self.model.branch_protection_enabled = branch_protection_enabled
39243925
self.model.note = note
39253926
self.model.enabled = enabled
3927+
self.model.skip_cla = skip_cla
39263928

39273929
def __str__(self):
39283930
return (
@@ -3933,8 +3935,9 @@ def __str__(self):
39333935
f'organization company id: {self.model.organization_company_id}, '
39343936
f'auto_enabled: {self.model.auto_enabled},'
39353937
f'branch_protection_enabled: {self.model.branch_protection_enabled},'
3936-
f'note: {self.model.note}'
3937-
f'enabled: {self.model.enabled}'
3938+
f'note: {self.model.note},'
3939+
f'enabled: {self.model.enabled},'
3940+
f'skip_cla: {self.model.skip_cla}'
39383941
)
39393942

39403943
def to_dict(self):
@@ -3980,6 +3983,9 @@ def get_auto_enabled(self):
39803983
def get_branch_protection_enabled(self):
39813984
return self.model.branch_protection_enabled
39823985

3986+
def get_skip_cla(self):
3987+
return self.model.skip_cla
3988+
39833989
def get_note(self):
39843990
"""
39853991
Getter for the note.
@@ -4017,6 +4023,9 @@ def set_auto_enabled(self, auto_enabled):
40174023
def set_branch_protection_enabled(self, branch_protection_enabled):
40184024
self.model.branch_protection_enabled = branch_protection_enabled
40194025

4026+
def set_skip_cla(self, skip_cla):
4027+
self.model.skip_cla = skip_cla
4028+
40204029
def set_note(self, note):
40214030
self.model.note = note
40224031

cla-backend/cla/models/event_types.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,4 @@ class EventType(Enum):
4848
RepositoryRemoved = "Repository Removed"
4949
RepositoryDisable = "Repository Disabled"
5050
RepositoryEnabled = "Repository Enabled"
51+
BypassCLA = "Bypass CLA"

cla-backend/cla/models/github_models.py

Lines changed: 162 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,21 @@
77
import concurrent.futures
88
import json
99
import os
10+
import re
1011
import base64
1112
import binascii
1213
import threading
1314
import time
1415
import uuid
15-
from typing import List, Optional, Union
16+
from typing import List, Optional, Union, Tuple
1617

1718
import cla
1819
import falcon
1920
import github
2021
from cla.controllers.github_application import GitHubInstallation
2122
from cla.models import DoesNotExist, repository_service_interface
22-
from cla.models.dynamo_models import GitHubOrg, Repository
23+
from cla.models.dynamo_models import GitHubOrg, Repository, Event
24+
from cla.models.event_types import EventType
2325
from cla.user import UserCommitSummary
2426
from cla.utils import (append_project_version_to_url, get_project_instance,
2527
set_active_pr_metadata)
@@ -736,6 +738,11 @@ def update_merge_group(self, installation_id, github_repository_id, merge_group_
736738
for user_commit_summary in commit_authors:
737739
handle_commit_from_user(project, user_commit_summary, signed, missing)
738740

741+
# Skip whitelisted bots per org/repo GitHub login/email regexps
742+
# repo can be defined as '*' (all repos) or re:<regexp> (regexp to match repo name) or 'repo-name' for exact match
743+
# the same for value which is GitHub login match then ; separator and then email match (same matching via * or re:<regexp> or exact-match)
744+
missing, signed = self.skip_whitelisted_bots(github_org, repository.get_repository_name(), missing)
745+
739746
# update Merge group status
740747
self.update_merge_group_status(
741748
installation_id, github_repository_id, pull_request, merge_group_sha, signed, missing, project.get_version()
@@ -896,6 +903,10 @@ def update_change_request(self, installation_id, github_repository_id, change_re
896903
for future in concurrent.futures.as_completed(futures):
897904
cla.log.debug(f"{fn} - ThreadClosed for handle_commit_from_user")
898905

906+
# Skip whitelisted bots per org/repo GitHub login/email regexps
907+
# repo can be defined as '*' (all repos) or re:<regexp> (regexp to match repo name) or 'repo-name' for exact match
908+
# the same for value which is GitHub login match then ; separator and then email match (same matching via * or re:<regexp> or exact-match)
909+
missing, signed = self.skip_whitelisted_bots(github_org, repository.get_repository_name(), missing)
899910
# At this point, the signed and missing lists are now filled and updated with the commit user info
900911

901912
cla.log.debug(
@@ -915,6 +926,155 @@ def update_change_request(self, installation_id, github_repository_id, change_re
915926
project_version=project.get_version(),
916927
)
917928

929+
def property_matches(self, pattern, value):
930+
"""
931+
Returns True if value matches the pattern.
932+
- '*' matches anything
933+
- 're:...' matches regex - value must be set
934+
- otherwise, exact match
935+
"""
936+
try:
937+
if pattern == '*':
938+
return True
939+
if pattern.startswith('re:'):
940+
regex = pattern[3:]
941+
return value is not None and re.search(regex, value) is not None
942+
return value == pattern
943+
except Exception as exc:
944+
cla.log.warning("Error in property_matches: pattern=%s, value=%s, exc=%s", pattern, value, exc)
945+
return False
946+
947+
def is_actor_skipped(self, actor, config):
948+
"""
949+
Returns True if the actor should be skipped (whitelisted) based on config pattern.
950+
config: '<username_pattern>:<email_pattern>'
951+
"""
952+
try:
953+
if ';' not in config:
954+
return False
955+
username_pattern, email_pattern = config.split(';', 1)
956+
username = getattr(actor, "author_login", None)
957+
email = getattr(actor, "author_email", None)
958+
return self.property_matches(username_pattern, username) and self.property_matches(email_pattern, email)
959+
except Exception as exc:
960+
cla.log.warning("Error in is_actor_skipped: config=%s, actor=%s, exc=%s", config, actor, exc)
961+
return False
962+
963+
def strip_org(self, repo_full):
964+
if '/' in repo_full:
965+
return repo_full.split('/', 1)[1]
966+
return repo_full
967+
968+
def skip_whitelisted_bots(self, org_model, org_repo, actors_missing_cla) -> Tuple[List[UserCommitSummary], List[UserCommitSummary]]:
969+
"""
970+
Check if the actors are whitelisted based on the skip_cla configuration.
971+
Returns a tuple of two lists:
972+
- actors_missing_cla: actors who still need to sign the CLA after checking skip_cla
973+
- whitelisted_actors: actors who are skipped due to skip_cla configuration
974+
:param org_model: The GitHub organization model instance.
975+
:param org_repo: The repository name in the format 'org/repo'.
976+
:param actors_missing_cla: List of UserCommitSummary objects representing actors who are missing CLA.
977+
:return: Tuple of (actors_missing_cla, whitelisted_actors)
978+
: in cla-{stage}-github-orgs table there can be a skip_cla field which is a dict with the following structure:
979+
{
980+
"repo-name": "<username_pattern>;<email_pattern>",
981+
"re:repo-regexp": "<username_pattern>;<email_pattern>",
982+
"*": "<username_pattern>;<email_pattern>"
983+
}
984+
where:
985+
- repo-name is the exact repository name (e.g., "my-org/my-repo")
986+
- re:repo-regexp is a regex pattern to match repository names
987+
- * is a wildcard that applies to all repositories
988+
- <username_pattern> is a GitHub username pattern (exact match or regex prefixed by re: or match all '*')
989+
- <email_pattern> is a GitHub email pattern (exact match or regex prefixed by re: or match all '*')
990+
:note: The username and email patterns are separated by a semicolon (;).
991+
:note: If the skip_cla is not set, it will skip the whitelisted bots check.
992+
"""
993+
try:
994+
repo = self.strip_org(org_repo)
995+
skip_cla = org_model.get_skip_cla()
996+
if skip_cla is None:
997+
cla.log.debug("skip_cla is not set, skipping whitelisted bots check")
998+
return actors_missing_cla, []
999+
1000+
if hasattr(skip_cla, "as_dict"):
1001+
skip_cla = skip_cla.as_dict()
1002+
config = ''
1003+
# 1. Exact match
1004+
if repo in skip_cla:
1005+
cla.log.debug("skip_cla config found for repo %s: %s (exact hit)", repo, skip_cla[repo])
1006+
config = skip_cla[repo]
1007+
1008+
# 2. Regex pattern (if no exact hit)
1009+
if config == '':
1010+
cla.log.debug("No skip_cla config found for repo %s, checking regex patterns", repo)
1011+
for k, v in skip_cla.items():
1012+
if not isinstance(k, str) or not k.startswith("re:"):
1013+
continue
1014+
pattern = k[3:]
1015+
try:
1016+
if re.search(pattern, repo):
1017+
config = v
1018+
cla.log.debug("Found skip_cla config for repo %s: %s via regex pattern: %s", repo, config, pattern)
1019+
break
1020+
except re.error as e:
1021+
cla.log.warning("Invalid regex in skip_cla: %s (%s)", k, e)
1022+
continue
1023+
1024+
# 3. Wildcard fallback
1025+
if config == '' and '*' in skip_cla:
1026+
cla.log.debug("No skip_cla config found for repo %s, using wildcard config", repo)
1027+
config = skip_cla['*']
1028+
1029+
# 4. No match
1030+
if config == '':
1031+
cla.log.debug("No skip_cla config found for repo %s, skipping whitelisted bots check", repo)
1032+
return actors_missing_cla, []
1033+
1034+
out_actors_missing_cla = []
1035+
whitelisted_actors = []
1036+
for actor in actors_missing_cla:
1037+
try:
1038+
if self.is_actor_skipped(actor, config):
1039+
actor_data = "id='{}',login='{}',username='{}',email='{}'".format(
1040+
getattr(actor, "author_id", "(null)"),
1041+
getattr(actor, "author_login", "(null)"),
1042+
getattr(actor, "author_username", "(null)"),
1043+
getattr(actor, "author_email", "(null)"),
1044+
)
1045+
msg = "Skipping CLA check for repo='{}', actor: {} due to skip_cla config: '{}'".format(
1046+
org_repo,
1047+
actor_data,
1048+
config,
1049+
)
1050+
cla.log.info(msg)
1051+
ev = Event.create_event(
1052+
event_type=EventType.BypassCLA,
1053+
event_data=msg,
1054+
event_summary=msg,
1055+
event_user_name=actor_data,
1056+
contains_pii=True,
1057+
)
1058+
actor.authorized = True
1059+
whitelisted_actors.append(actor)
1060+
continue
1061+
except Exception as e:
1062+
cla.log.warning(
1063+
"Error checking skip_cla for actor '%s' (login='%s', email='%s'): %s",
1064+
actor, getattr(actor, "author_login", None), getattr(actor, "author_email", None), e,
1065+
)
1066+
out_actors_missing_cla.append(actor)
1067+
1068+
return out_actors_missing_cla, whitelisted_actors
1069+
except Exception as exc:
1070+
cla.log.error(
1071+
"Exception in skip_whitelisted_bots: %s (repo=%s, actors=%s). Disabling skip_cla logic for this run.",
1072+
exc, repo, actors
1073+
)
1074+
# Always return all actors if something breaks
1075+
return actors_missing_cla, []
1076+
1077+
9181078
def get_pull_request(self, github_repository_id, pull_request_number, installation_id):
9191079
"""
9201080
Helper method to get the pull request object from GitHub.

cla-backend/cla/routes.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1841,6 +1841,9 @@ def user_from_session(request, response):
18411841
Will return 200 and user data if there is an active GitHub session
18421842
Can return 404 on OAuth2 errors
18431843
"""
1844+
# https://github.com/sun-test-org/repo1/pull/215
1845+
# from cla.models.github_models import GitHub
1846+
# GitHub().process_opened_pull_request({"pull_request":{"number":215}, "repository":{"id":614349032}, "installation":{"id":35275118}})
18441847
raw_redirect = request.params.get('get_redirect_url', 'false').lower()
18451848
get_redirect_url = raw_redirect in ('1', 'true', 'yes')
18461849
return cla.controllers.repository_service.user_from_session(get_redirect_url, request, response)

utils/describe_all.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/bin/bash
2+
if [ -z "$STAGE" ]
3+
then
4+
export STAGE=dev
5+
fi
6+
if [ -z "$REGION" ]
7+
then
8+
export REGION=us-east-1
9+
fi
10+
> all-tables.secret
11+
./utils/list_tables.sh | sed 's/[", ]//g' | grep -v '^$' | while read -r table; do
12+
tab="${table#cla-${STAGE}-}"
13+
echo -n "Processing table $tab ..."
14+
echo "Table: $tab" >> all-tables.secret
15+
ALL=1 ./utils/scan.sh "${tab}" >> all-tables.secret
16+
echo 'done'
17+
done

utils/describe_table.sh

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,14 @@ if [ -z "${STAGE}" ]
88
then
99
export STAGE=dev
1010
fi
11-
11+
if [ -z "$REGION" ]
12+
then
13+
REGION=us-east-1
14+
fi
1215
if [ ! -z "${DEBUG}" ]
1316
then
14-
echo "aws --profile \"lfproduct-${STAGE}\" dynamodb describe-table --table-name \"cla-${STAGE}-${1}\""
17+
echo "aws --profile \"lfproduct-${STAGE}\" --region \"${REGION}\" dynamodb describe-table --table-name \"cla-${STAGE}-${1}\""
18+
aws --profile "lfproduct-${STAGE}" --region "${REGION}" dynamodb describe-table --table-name "cla-${STAGE}-${1}"
19+
else
20+
aws --profile "lfproduct-${STAGE}" --region "${REGION}" dynamodb describe-table --table-name "cla-${STAGE}-${1}" | jq -r '.Table.AttributeDefinitions'
1521
fi
16-
aws --profile "lfproduct-${STAGE}" dynamodb describe-table --table-name "cla-${STAGE}-${1}"

utils/list_tables.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/bin/bash
2+
if [ -z "$STAGE" ]
3+
then
4+
STAGE=dev
5+
fi
6+
if [ -z "$REGION" ]
7+
then
8+
REGION=us-east-1
9+
fi
10+
aws --profile "lfproduct-${STAGE}" --region "${REGION}" dynamodb list-tables | grep "cla-${STAGE}-"

utils/skip_cla_entry.sh

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#!/bin/bash
2+
# MODE=mode ./utils/skip_cla_entry.sh sun-test-org '*' 'copilot-swe-agent[bot]' '*'
3+
# put-item Overwrites the entire item (skip_cla and all other attributes if needed)
4+
# add-key Adds or updates a key/value inside the skip_cla map (preserves other keys)
5+
# delete-key Removes a key from the skip_cla map
6+
# delete-item Deletes the entire DynamoDB item (removes the whole row)
7+
#
8+
# MODE=add-key ./utils/skip_cla_entry.sh sun-test-org 'repo1' 're:vee?rendra' '*'
9+
# ./utils/scan.sh github-orgs organization_name sun-test-org
10+
11+
if [ -z "$MODE" ]
12+
then
13+
echo "$0: MODE must be set, valid values are: put-item, add-key, delete-key, delete-item"
14+
exit 1
15+
fi
16+
17+
if [ -z "$STAGE" ]; then
18+
STAGE=dev
19+
fi
20+
if [ -z "$REGION" ]; then
21+
REGION=us-east-1
22+
fi
23+
24+
case "$MODE" in
25+
put-item)
26+
if ( [ -z "${1}" ] || [ -z "${2}" ] || [ -z "${3}" ] || [ -z "${4}" ] ); then
27+
echo "Usage: $0 <organization_name> <repo or *> <bot username> <email regexp>"
28+
exit 1
29+
fi
30+
aws --profile "lfproduct-${STAGE}" --region "${REGION}" dynamodb update-item \
31+
--table-name "cla-${STAGE}-github-orgs" \
32+
--key "{\"organization_name\": {\"S\": \"${1}\"}}" \
33+
--update-expression 'SET skip_cla = :val' \
34+
--expression-attribute-values "{\":val\": {\"M\": {\"${2}\":{\"S\":\"${3};${4}\"}}}}"
35+
;;
36+
add-key)
37+
if ( [ -z "${1}" ] || [ -z "${2}" ] || [ -z "${3}" ] || [ -z "${4}" ] ); then
38+
echo "Usage: $0 <organization_name> <repo or *> <bot username> <email regexp>"
39+
exit 1
40+
fi
41+
aws --profile "lfproduct-${STAGE}" --region "${REGION}" dynamodb update-item \
42+
--table-name "cla-${STAGE}-github-orgs" \
43+
--key "{\"organization_name\": {\"S\": \"${1}\"}}" \
44+
--update-expression "SET skip_cla.#repo = :val" \
45+
--expression-attribute-names "{\"#repo\": \"${2}\"}" \
46+
--expression-attribute-values "{\":val\": {\"S\": \"${3};${4}\"}}"
47+
;;
48+
delete-key)
49+
if ( [ -z "${1}" ] || [ -z "${2}" ] ); then
50+
echo "Usage: $0 <organization_name> <repo or *>"
51+
exit 1
52+
fi
53+
aws --profile "lfproduct-${STAGE}" --region "${REGION}" dynamodb update-item \
54+
--table-name "cla-${STAGE}-github-orgs" \
55+
--key "{\"organization_name\": {\"S\": \"${1}\"}}" \
56+
--update-expression "REMOVE skip_cla.#repo" \
57+
--expression-attribute-names "{\"#repo\": \"${2}\"}"
58+
;;
59+
delete-item)
60+
if [ -z "${1}" ]; then
61+
echo "Usage: $0 <organization_name>"
62+
exit 1
63+
fi
64+
aws --profile "lfproduct-${STAGE}" --region "${REGION}" dynamodb update-item \
65+
--table-name "cla-${STAGE}-github-orgs" \
66+
--key "{\"organization_name\": {\"S\": \"${1}\"}}" \
67+
--update-expression "REMOVE skip_cla"
68+
;;
69+
*)
70+
echo "$0: Unknown MODE: $MODE"
71+
echo "Valid values are: put-item, add-key, delete-key, delete-item"
72+
exit 1
73+
;;
74+
esac
75+

0 commit comments

Comments
 (0)