2323import re
2424from collections .abc import Sequence
2525from datetime import datetime , timedelta
26- from functools import cached_property
2726from typing import TYPE_CHECKING , Any , Callable , cast
2827
2928from airflow .configuration import conf
3433
3534from airflow .exceptions import AirflowException
3635from airflow .providers .amazon .aws .hooks .s3 import S3Hook
36+ from airflow .providers .amazon .aws .sensors .base_aws import AwsBaseSensor
3737from airflow .providers .amazon .aws .triggers .s3 import S3KeysUnchangedTrigger , S3KeyTrigger
38- from airflow .sensors .base import BaseSensorOperator , poke_mode_only
38+ from airflow .providers .amazon .aws .utils .mixins import aws_template_fields
39+ from airflow .sensors .base import poke_mode_only
3940
4041
41- class S3KeySensor (BaseSensorOperator ):
42+ class S3KeySensor (AwsBaseSensor [ S3Hook ] ):
4243 """
4344 Waits for one or multiple keys (a file-like instance on S3) to be present in a S3 bucket.
4445
@@ -65,27 +66,25 @@ class S3KeySensor(BaseSensorOperator):
6566
6667 def check_fn(files: List, **kwargs) -> bool:
6768 return any(f.get('Size', 0) > 1048576 for f in files)
68- :param aws_conn_id: a reference to the s3 connection
69- :param verify: Whether to verify SSL certificates for S3 connection.
70- By default, SSL certificates are verified.
71- You can provide the following values:
72-
73- - ``False``: do not validate SSL certificates. SSL will still be used
74- (unless use_ssl is False), but SSL certificates will not be
75- verified.
76- - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses.
77- You can specify this argument if you want to use a different
78- CA cert bundle than the one used by botocore.
7969 :param deferrable: Run operator in the deferrable mode
8070 :param use_regex: whether to use regex to check bucket
8171 :param metadata_keys: List of head_object attributes to gather and send to ``check_fn``.
8272 Acceptable values: Any top level attribute returned by s3.head_object. Specify * to return
8373 all available attributes.
8474 Default value: "Size".
8575 If the requested attribute is not found, the key is still included and the value is None.
76+ :param aws_conn_id: The Airflow connection used for AWS credentials.
77+ If this is ``None`` or empty then the default boto3 behaviour is used. If
78+ running Airflow in a distributed manner and aws_conn_id is None or
79+ empty, then default boto3 configuration would be used (and must be
80+ maintained on each worker node).
81+ :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
82+ :param verify: Whether or not to verify SSL certificates. See:
83+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
8684 """
8785
88- template_fields : Sequence [str ] = ("bucket_key" , "bucket_name" )
86+ template_fields : Sequence [str ] = aws_template_fields ("bucket_key" , "bucket_name" )
87+ aws_hook_class = S3Hook
8988
9089 def __init__ (
9190 self ,
@@ -94,7 +93,6 @@ def __init__(
9493 bucket_name : str | None = None ,
9594 wildcard_match : bool = False ,
9695 check_fn : Callable [..., bool ] | None = None ,
97- aws_conn_id : str | None = "aws_default" ,
9896 verify : str | bool | None = None ,
9997 deferrable : bool = conf .getboolean ("operators" , "default_deferrable" , fallback = False ),
10098 use_regex : bool = False ,
@@ -106,14 +104,13 @@ def __init__(
106104 self .bucket_key = bucket_key
107105 self .wildcard_match = wildcard_match
108106 self .check_fn = check_fn
109- self .aws_conn_id = aws_conn_id
110107 self .verify = verify
111108 self .deferrable = deferrable
112109 self .use_regex = use_regex
113110 self .metadata_keys = metadata_keys if metadata_keys else ["Size" ]
114111
115112 def _check_key (self , key , context : Context ):
116- bucket_name , key = S3Hook .get_s3_bucket_key (self .bucket_name , key , "bucket_name" , "bucket_key" )
113+ bucket_name , key = self . hook .get_s3_bucket_key (self .bucket_name , key , "bucket_name" , "bucket_key" )
117114 self .log .info ("Poking for key : s3://%s/%s" , bucket_name , key )
118115
119116 """
@@ -199,7 +196,9 @@ def _defer(self) -> None:
199196 bucket_key = self .bucket_key ,
200197 wildcard_match = self .wildcard_match ,
201198 aws_conn_id = self .aws_conn_id ,
199+ region_name = self .region_name ,
202200 verify = self .verify ,
201+ botocore_config = self .botocore_config ,
203202 poke_interval = self .poke_interval ,
204203 should_check_fn = bool (self .check_fn ),
205204 use_regex = self .use_regex ,
@@ -220,13 +219,9 @@ def execute_complete(self, context: Context, event: dict[str, Any]) -> None:
220219 elif event ["status" ] == "error" :
221220 raise AirflowException (event ["message" ])
222221
223- @cached_property
224- def hook (self ) -> S3Hook :
225- return S3Hook (aws_conn_id = self .aws_conn_id , verify = self .verify )
226-
227222
228223@poke_mode_only
229- class S3KeysUnchangedSensor (BaseSensorOperator ):
224+ class S3KeysUnchangedSensor (AwsBaseSensor [ S3Hook ] ):
230225 """
231226 Return True if inactivity_period has passed with no increase in the number of objects matching prefix.
232227
@@ -239,17 +234,7 @@ class S3KeysUnchangedSensor(BaseSensorOperator):
239234
240235 :param bucket_name: Name of the S3 bucket
241236 :param prefix: The prefix being waited on. Relative path from bucket root level.
242- :param aws_conn_id: a reference to the s3 connection
243- :param verify: Whether or not to verify SSL certificates for S3 connection.
244- By default SSL certificates are verified.
245- You can provide the following values:
246-
247- - ``False``: do not validate SSL certificates. SSL will still be used
248- (unless use_ssl is False), but SSL certificates will not be
249- verified.
250- - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses.
251- You can specify this argument if you want to use a different
252- CA cert bundle than the one used by botocore.
237+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
253238 :param inactivity_period: The total seconds of inactivity to designate
254239 keys unchanged. Note, this mechanism is not real time and
255240 this operator may not return until a poke_interval after this period
@@ -261,16 +246,24 @@ class S3KeysUnchangedSensor(BaseSensorOperator):
261246 between pokes valid behavior. If true a warning message will be logged
262247 when this happens. If false an error will be raised.
263248 :param deferrable: Run sensor in the deferrable mode
249+ :param aws_conn_id: The Airflow connection used for AWS credentials.
250+ If this is ``None`` or empty then the default boto3 behaviour is used. If
251+ running Airflow in a distributed manner and aws_conn_id is None or
252+ empty, then default boto3 configuration would be used (and must be
253+ maintained on each worker node).
254+ :param region_name: AWS region_name. If not specified then the default boto3 behaviour is used.
255+ :param verify: Whether or not to verify SSL certificates. See:
256+ https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html
264257 """
265258
266- template_fields : Sequence [str ] = ("bucket_name" , "prefix" )
259+ template_fields : Sequence [str ] = aws_template_fields ("bucket_name" , "prefix" )
260+ aws_hook_class = S3Hook
267261
268262 def __init__ (
269263 self ,
270264 * ,
271265 bucket_name : str ,
272266 prefix : str ,
273- aws_conn_id : str | None = "aws_default" ,
274267 verify : bool | str | None = None ,
275268 inactivity_period : float = 60 * 60 ,
276269 min_objects : int = 1 ,
@@ -291,15 +284,9 @@ def __init__(
291284 self .inactivity_seconds = 0
292285 self .allow_delete = allow_delete
293286 self .deferrable = deferrable
294- self .aws_conn_id = aws_conn_id
295287 self .verify = verify
296288 self .last_activity_time : datetime | None = None
297289
298- @cached_property
299- def hook (self ):
300- """Returns S3Hook."""
301- return S3Hook (aws_conn_id = self .aws_conn_id , verify = self .verify )
302-
303290 def is_keys_unchanged (self , current_objects : set [str ]) -> bool :
304291 """
305292 Check for new objects after the inactivity_period and update the sensor state accordingly.
@@ -382,7 +369,9 @@ def execute(self, context: Context) -> None:
382369 inactivity_seconds = self .inactivity_seconds ,
383370 allow_delete = self .allow_delete ,
384371 aws_conn_id = self .aws_conn_id ,
372+ region_name = self .region_name ,
385373 verify = self .verify ,
374+ botocore_config = self .botocore_config ,
386375 last_activity_time = self .last_activity_time ,
387376 ),
388377 method_name = "execute_complete" ,
0 commit comments