77import concurrent .futures
88import json
99import os
10+ import re
1011import base64
1112import binascii
1213import threading
1314import time
1415import uuid
15- from typing import List , Optional , Union
16+ from typing import List , Optional , Union , Tuple
1617
1718import cla
1819import falcon
1920import github
2021from cla .controllers .github_application import GitHubInstallation
2122from cla .models import DoesNotExist , repository_service_interface
22- from cla .models .dynamo_models import GitHubOrg , Repository
23+ from cla .models .dynamo_models import GitHubOrg , Repository , Event
24+ from cla .models .event_types import EventType
2325from cla .user import UserCommitSummary
2426from cla .utils import (append_project_version_to_url , get_project_instance ,
2527 set_active_pr_metadata )
@@ -736,6 +738,11 @@ def update_merge_group(self, installation_id, github_repository_id, merge_group_
736738 for user_commit_summary in commit_authors :
737739 handle_commit_from_user (project , user_commit_summary , signed , missing )
738740
741+ # Skip whitelisted bots per org/repo GitHub login/email regexps
742+ # repo can be defined as '*' (all repos) or re:<regexp> (regexp to match repo name) or 'repo-name' for exact match
743+ # the same for value which is GitHub login match then ; separator and then email match (same matching via * or re:<regexp> or exact-match)
744+ missing , signed = self .skip_whitelisted_bots (github_org , repository .get_repository_name (), missing )
745+
739746 # update Merge group status
740747 self .update_merge_group_status (
741748 installation_id , github_repository_id , pull_request , merge_group_sha , signed , missing , project .get_version ()
@@ -896,6 +903,10 @@ def update_change_request(self, installation_id, github_repository_id, change_re
896903 for future in concurrent .futures .as_completed (futures ):
897904 cla .log .debug (f"{ fn } - ThreadClosed for handle_commit_from_user" )
898905
906+ # Skip whitelisted bots per org/repo GitHub login/email regexps
907+ # repo can be defined as '*' (all repos) or re:<regexp> (regexp to match repo name) or 'repo-name' for exact match
908+ # the same for value which is GitHub login match then ; separator and then email match (same matching via * or re:<regexp> or exact-match)
909+ missing , signed = self .skip_whitelisted_bots (github_org , repository .get_repository_name (), missing )
899910 # At this point, the signed and missing lists are now filled and updated with the commit user info
900911
901912 cla .log .debug (
@@ -915,6 +926,155 @@ def update_change_request(self, installation_id, github_repository_id, change_re
915926 project_version = project .get_version (),
916927 )
917928
929+ def property_matches (self , pattern , value ):
930+ """
931+ Returns True if value matches the pattern.
932+ - '*' matches anything
933+ - 're:...' matches regex - value must be set
934+ - otherwise, exact match
935+ """
936+ try :
937+ if pattern == '*' :
938+ return True
939+ if pattern .startswith ('re:' ):
940+ regex = pattern [3 :]
941+ return value is not None and re .search (regex , value ) is not None
942+ return value == pattern
943+ except Exception as exc :
944+ cla .log .warning ("Error in property_matches: pattern=%s, value=%s, exc=%s" , pattern , value , exc )
945+ return False
946+
947+ def is_actor_skipped (self , actor , config ):
948+ """
949+ Returns True if the actor should be skipped (whitelisted) based on config pattern.
950+ config: '<username_pattern>:<email_pattern>'
951+ """
952+ try :
953+ if ';' not in config :
954+ return False
955+ username_pattern , email_pattern = config .split (';' , 1 )
956+ username = getattr (actor , "author_login" , None )
957+ email = getattr (actor , "author_email" , None )
958+ return self .property_matches (username_pattern , username ) and self .property_matches (email_pattern , email )
959+ except Exception as exc :
960+ cla .log .warning ("Error in is_actor_skipped: config=%s, actor=%s, exc=%s" , config , actor , exc )
961+ return False
962+
963+ def strip_org (self , repo_full ):
964+ if '/' in repo_full :
965+ return repo_full .split ('/' , 1 )[1 ]
966+ return repo_full
967+
968+ def skip_whitelisted_bots (self , org_model , org_repo , actors_missing_cla ) -> Tuple [List [UserCommitSummary ], List [UserCommitSummary ]]:
969+ """
970+ Check if the actors are whitelisted based on the skip_cla configuration.
971+ Returns a tuple of two lists:
972+ - actors_missing_cla: actors who still need to sign the CLA after checking skip_cla
973+ - whitelisted_actors: actors who are skipped due to skip_cla configuration
974+ :param org_model: The GitHub organization model instance.
975+ :param org_repo: The repository name in the format 'org/repo'.
976+ :param actors_missing_cla: List of UserCommitSummary objects representing actors who are missing CLA.
977+ :return: Tuple of (actors_missing_cla, whitelisted_actors)
978+ : in cla-{stage}-github-orgs table there can be a skip_cla field which is a dict with the following structure:
979+ {
980+ "repo-name": "<username_pattern>;<email_pattern>",
981+ "re:repo-regexp": "<username_pattern>;<email_pattern>",
982+ "*": "<username_pattern>;<email_pattern>"
983+ }
984+ where:
985+ - repo-name is the exact repository name (e.g., "my-org/my-repo")
986+ - re:repo-regexp is a regex pattern to match repository names
987+ - * is a wildcard that applies to all repositories
988+ - <username_pattern> is a GitHub username pattern (exact match or regex prefixed by re: or match all '*')
989+ - <email_pattern> is a GitHub email pattern (exact match or regex prefixed by re: or match all '*')
990+ :note: The username and email patterns are separated by a semicolon (;).
991+ :note: If the skip_cla is not set, it will skip the whitelisted bots check.
992+ """
993+ try :
994+ repo = self .strip_org (org_repo )
995+ skip_cla = org_model .get_skip_cla ()
996+ if skip_cla is None :
997+ cla .log .debug ("skip_cla is not set, skipping whitelisted bots check" )
998+ return actors_missing_cla , []
999+
1000+ if hasattr (skip_cla , "as_dict" ):
1001+ skip_cla = skip_cla .as_dict ()
1002+ config = ''
1003+ # 1. Exact match
1004+ if repo in skip_cla :
1005+ cla .log .debug ("skip_cla config found for repo %s: %s (exact hit)" , repo , skip_cla [repo ])
1006+ config = skip_cla [repo ]
1007+
1008+ # 2. Regex pattern (if no exact hit)
1009+ if config == '' :
1010+ cla .log .debug ("No skip_cla config found for repo %s, checking regex patterns" , repo )
1011+ for k , v in skip_cla .items ():
1012+ if not isinstance (k , str ) or not k .startswith ("re:" ):
1013+ continue
1014+ pattern = k [3 :]
1015+ try :
1016+ if re .search (pattern , repo ):
1017+ config = v
1018+ cla .log .debug ("Found skip_cla config for repo %s: %s via regex pattern: %s" , repo , config , pattern )
1019+ break
1020+ except re .error as e :
1021+ cla .log .warning ("Invalid regex in skip_cla: %s (%s)" , k , e )
1022+ continue
1023+
1024+ # 3. Wildcard fallback
1025+ if config == '' and '*' in skip_cla :
1026+ cla .log .debug ("No skip_cla config found for repo %s, using wildcard config" , repo )
1027+ config = skip_cla ['*' ]
1028+
1029+ # 4. No match
1030+ if config == '' :
1031+ cla .log .debug ("No skip_cla config found for repo %s, skipping whitelisted bots check" , repo )
1032+ return actors_missing_cla , []
1033+
1034+ out_actors_missing_cla = []
1035+ whitelisted_actors = []
1036+ for actor in actors_missing_cla :
1037+ try :
1038+ if self .is_actor_skipped (actor , config ):
1039+ actor_data = "id='{}',login='{}',username='{}',email='{}'" .format (
1040+ getattr (actor , "author_id" , "(null)" ),
1041+ getattr (actor , "author_login" , "(null)" ),
1042+ getattr (actor , "author_username" , "(null)" ),
1043+ getattr (actor , "author_email" , "(null)" ),
1044+ )
1045+ msg = "Skipping CLA check for repo='{}', actor: {} due to skip_cla config: '{}'" .format (
1046+ org_repo ,
1047+ actor_data ,
1048+ config ,
1049+ )
1050+ cla .log .info (msg )
1051+ ev = Event .create_event (
1052+ event_type = EventType .BypassCLA ,
1053+ event_data = msg ,
1054+ event_summary = msg ,
1055+ event_user_name = actor_data ,
1056+ contains_pii = True ,
1057+ )
1058+ actor .authorized = True
1059+ whitelisted_actors .append (actor )
1060+ continue
1061+ except Exception as e :
1062+ cla .log .warning (
1063+ "Error checking skip_cla for actor '%s' (login='%s', email='%s'): %s" ,
1064+ actor , getattr (actor , "author_login" , None ), getattr (actor , "author_email" , None ), e ,
1065+ )
1066+ out_actors_missing_cla .append (actor )
1067+
1068+ return out_actors_missing_cla , whitelisted_actors
1069+ except Exception as exc :
1070+ cla .log .error (
1071+ "Exception in skip_whitelisted_bots: %s (repo=%s, actors=%s). Disabling skip_cla logic for this run." ,
1072+ exc , repo , actors
1073+ )
1074+ # Always return all actors if something breaks
1075+ return actors_missing_cla , []
1076+
1077+
9181078 def get_pull_request (self , github_repository_id , pull_request_number , installation_id ):
9191079 """
9201080 Helper method to get the pull request object from GitHub.
0 commit comments