99 from typing import Optional
1010
1111 from sentry_sdk ._types import Event , Hint
12+ from pyspark import SparkContext
1213
1314
1415class SparkIntegration (Integration ):
@@ -17,7 +18,7 @@ class SparkIntegration(Integration):
1718 @staticmethod
1819 def setup_once ():
1920 # type: () -> None
20- patch_spark_context_init ()
21+ _setup_sentry_tracing ()
2122
2223
2324def _set_app_properties ():
@@ -37,7 +38,7 @@ def _set_app_properties():
3738
3839
3940def _start_sentry_listener (sc ):
40- # type: (Any ) -> None
41+ # type: (SparkContext ) -> None
4142 """
4243 Start java gateway server to add custom `SparkListener`
4344 """
@@ -49,7 +50,51 @@ def _start_sentry_listener(sc):
4950 sc ._jsc .sc ().addSparkListener (listener )
5051
5152
52- def patch_spark_context_init ():
53+ def _add_event_processor (sc ):
54+ # type: (SparkContext) -> None
55+ scope = sentry_sdk .get_isolation_scope ()
56+
57+ @scope .add_event_processor
58+ def process_event (event , hint ):
59+ # type: (Event, Hint) -> Optional[Event]
60+ with capture_internal_exceptions ():
61+ if sentry_sdk .get_client ().get_integration (SparkIntegration ) is None :
62+ return event
63+
64+ if sc ._active_spark_context is None :
65+ return event
66+
67+ event .setdefault ("user" , {}).setdefault ("id" , sc .sparkUser ())
68+
69+ event .setdefault ("tags" , {}).setdefault (
70+ "executor.id" , sc ._conf .get ("spark.executor.id" )
71+ )
72+ event ["tags" ].setdefault (
73+ "spark-submit.deployMode" ,
74+ sc ._conf .get ("spark.submit.deployMode" ),
75+ )
76+ event ["tags" ].setdefault ("driver.host" , sc ._conf .get ("spark.driver.host" ))
77+ event ["tags" ].setdefault ("driver.port" , sc ._conf .get ("spark.driver.port" ))
78+ event ["tags" ].setdefault ("spark_version" , sc .version )
79+ event ["tags" ].setdefault ("app_name" , sc .appName )
80+ event ["tags" ].setdefault ("application_id" , sc .applicationId )
81+ event ["tags" ].setdefault ("master" , sc .master )
82+ event ["tags" ].setdefault ("spark_home" , sc .sparkHome )
83+
84+ event .setdefault ("extra" , {}).setdefault ("web_url" , sc .uiWebUrl )
85+
86+ return event
87+
88+
89+ def _activate_integration (sc ):
90+ # type: (SparkContext) -> None
91+
92+ _start_sentry_listener (sc )
93+ _set_app_properties ()
94+ _add_event_processor (sc )
95+
96+
97+ def _patch_spark_context_init ():
5398 # type: () -> None
5499 from pyspark import SparkContext
55100
@@ -59,51 +104,22 @@ def patch_spark_context_init():
59104 def _sentry_patched_spark_context_init (self , * args , ** kwargs ):
60105 # type: (SparkContext, *Any, **Any) -> Optional[Any]
61106 rv = spark_context_init (self , * args , ** kwargs )
62- _start_sentry_listener (self )
63- _set_app_properties ()
64-
65- scope = sentry_sdk .get_isolation_scope ()
66-
67- @scope .add_event_processor
68- def process_event (event , hint ):
69- # type: (Event, Hint) -> Optional[Event]
70- with capture_internal_exceptions ():
71- if sentry_sdk .get_client ().get_integration (SparkIntegration ) is None :
72- return event
73-
74- if self ._active_spark_context is None :
75- return event
76-
77- event .setdefault ("user" , {}).setdefault ("id" , self .sparkUser ())
78-
79- event .setdefault ("tags" , {}).setdefault (
80- "executor.id" , self ._conf .get ("spark.executor.id" )
81- )
82- event ["tags" ].setdefault (
83- "spark-submit.deployMode" ,
84- self ._conf .get ("spark.submit.deployMode" ),
85- )
86- event ["tags" ].setdefault (
87- "driver.host" , self ._conf .get ("spark.driver.host" )
88- )
89- event ["tags" ].setdefault (
90- "driver.port" , self ._conf .get ("spark.driver.port" )
91- )
92- event ["tags" ].setdefault ("spark_version" , self .version )
93- event ["tags" ].setdefault ("app_name" , self .appName )
94- event ["tags" ].setdefault ("application_id" , self .applicationId )
95- event ["tags" ].setdefault ("master" , self .master )
96- event ["tags" ].setdefault ("spark_home" , self .sparkHome )
97-
98- event .setdefault ("extra" , {}).setdefault ("web_url" , self .uiWebUrl )
99-
100- return event
101-
107+ _activate_integration (self )
102108 return rv
103109
104110 SparkContext ._do_init = _sentry_patched_spark_context_init
105111
106112
113+ def _setup_sentry_tracing ():
114+ # type: () -> None
115+ from pyspark import SparkContext
116+
117+ if SparkContext ._active_spark_context is not None :
118+ _activate_integration (SparkContext ._active_spark_context )
119+ return
120+ _patch_spark_context_init ()
121+
122+
107123class SparkListener :
108124 def onApplicationEnd (self , applicationEnd ): # noqa: N802,N803
109125 # type: (Any) -> None
@@ -208,10 +224,21 @@ class Java:
208224
209225
210226class SentryListener (SparkListener ):
227+ def _add_breadcrumb (
228+ self ,
229+ level , # type: str
230+ message , # type: str
231+ data = None , # type: Optional[dict[str, Any]]
232+ ):
233+ # type: (...) -> None
234+ sentry_sdk .get_global_scope ().add_breadcrumb (
235+ level = level , message = message , data = data
236+ )
237+
211238 def onJobStart (self , jobStart ): # noqa: N802,N803
212239 # type: (Any) -> None
213240 message = "Job {} Started" .format (jobStart .jobId ())
214- sentry_sdk . add_breadcrumb (level = "info" , message = message )
241+ self . _add_breadcrumb (level = "info" , message = message )
215242 _set_app_properties ()
216243
217244 def onJobEnd (self , jobEnd ): # noqa: N802,N803
@@ -227,14 +254,14 @@ def onJobEnd(self, jobEnd): # noqa: N802,N803
227254 level = "warning"
228255 message = "Job {} Failed" .format (jobEnd .jobId ())
229256
230- sentry_sdk . add_breadcrumb (level = level , message = message , data = data )
257+ self . _add_breadcrumb (level = level , message = message , data = data )
231258
232259 def onStageSubmitted (self , stageSubmitted ): # noqa: N802,N803
233260 # type: (Any) -> None
234261 stage_info = stageSubmitted .stageInfo ()
235262 message = "Stage {} Submitted" .format (stage_info .stageId ())
236263 data = {"attemptId" : stage_info .attemptId (), "name" : stage_info .name ()}
237- sentry_sdk . add_breadcrumb (level = "info" , message = message , data = data )
264+ self . _add_breadcrumb (level = "info" , message = message , data = data )
238265 _set_app_properties ()
239266
240267 def onStageCompleted (self , stageCompleted ): # noqa: N802,N803
@@ -255,4 +282,4 @@ def onStageCompleted(self, stageCompleted): # noqa: N802,N803
255282 message = "Stage {} Completed" .format (stage_info .stageId ())
256283 level = "info"
257284
258- sentry_sdk . add_breadcrumb (level = level , message = message , data = data )
285+ self . _add_breadcrumb (level = level , message = message , data = data )
0 commit comments