Skip to content

Commit 890baf7

Browse files
committed
inject OL to SparkConf
Signed-off-by: Maciej Obuchowski <[email protected]>
1 parent ca6895a commit 890baf7

File tree

6 files changed

+167
-53
lines changed

6 files changed

+167
-53
lines changed

build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ plugins {
2222
id 'pl.allegro.tech.build.axion-release' version '1.14.4'
2323
id 'io.github.gradle-nexus.publish-plugin' version '1.3.0'
2424

25-
id "com.github.johnrengelman.shadow" version "7.1.2" apply false
25+
id "com.github.johnrengelman.shadow" version "8.1.1" apply false
2626
id "me.champeau.jmh" version "0.7.0" apply false
2727
id 'org.gradle.playframework' version '0.13' apply false
2828
id 'info.solidsoft.pitest' version '1.9.11' apply false

dd-java-agent/instrumentation/spark/build.gradle

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ configurations.all {
99
dependencies {
1010
compileOnly group: 'org.apache.spark', name: 'spark-core_2.12', version: '2.4.0'
1111
compileOnly group: 'org.apache.spark', name: 'spark-sql_2.12', version: '2.4.0'
12+
compileOnly group: 'io.openlineage', name: "openlineage-spark_2.12", version: '1.28.0-SNAPSHOT'
1213

1314
testFixturesImplementation group: 'com.datadoghq', name: 'sketches-java', version: '0.8.2'
1415
testFixturesImplementation group: 'com.google.protobuf', name: 'protobuf-java', version: '3.14.0'
@@ -20,4 +21,5 @@ dependencies {
2021
testFixturesCompileOnly group: 'org.apache.spark', name: 'spark-core_2.12', version: '2.4.0'
2122
testFixturesCompileOnly group: 'org.apache.spark', name: 'spark-sql_2.12', version: '2.4.0'
2223
testFixturesCompileOnly group: 'org.apache.spark', name: 'spark-yarn_2.12', version: '2.4.0'
24+
testFixturesCompileOnly group: 'io.openlineage', name: "openlineage-spark_2.12", version: '1.28.0-SNAPSHOT'
2325
}

dd-java-agent/instrumentation/spark/spark_2.12/build.gradle

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
plugins {
22
id 'java-test-fixtures'
3+
id 'com.github.johnrengelman.shadow'
34
}
45

56
def sparkVersion = '2.4.0'
@@ -35,15 +36,18 @@ dependencies {
3536

3637
compileOnly group: 'org.apache.spark', name: "spark-core_$scalaVersion", version: "$sparkVersion"
3738
compileOnly group: 'org.apache.spark', name: "spark-sql_$scalaVersion", version: "$sparkVersion"
39+
compileOnly group: 'io.openlineage', name: "openlineage-spark_$scalaVersion", version: '1.28.0-SNAPSHOT'
3840

3941
testImplementation(testFixtures(project(":dd-java-agent:instrumentation:spark")))
4042
testImplementation group: 'org.apache.spark', name: "spark-core_$scalaVersion", version: "$sparkVersion"
4143
testImplementation group: 'org.apache.spark', name: "spark-sql_$scalaVersion", version: "$sparkVersion"
4244
testImplementation group: 'org.apache.spark', name: "spark-yarn_$scalaVersion", version: "$sparkVersion"
45+
testImplementation group: 'io.openlineage', name: "openlineage-spark_$scalaVersion", version: '1.28.0-SNAPSHOT'
4346

4447
test_spark24Implementation group: 'org.apache.spark', name: "spark-core_$scalaVersion", version: "2.4.8"
4548
test_spark24Implementation group: 'org.apache.spark', name: "spark-sql_$scalaVersion", version: "2.4.8"
4649
test_spark24Implementation group: 'org.apache.spark', name: "spark-yarn_$scalaVersion", version: "2.4.8"
50+
test_spark24Implementation group: 'io.openlineage', name: "openlineage-spark_$scalaVersion", version: '1.28.0-SNAPSHOT'
4751

4852
test_spark32Implementation group: 'org.apache.spark', name: "spark-core_$scalaVersion", version: "3.2.4"
4953
test_spark32Implementation group: 'org.apache.spark', name: "spark-sql_$scalaVersion", version: "3.2.4"
@@ -52,13 +56,20 @@ dependencies {
5256
// PooledByteBufAllocator constructor. See this PR where the new constructor (the only one we support) was introduced:
5357
// https://github.com/netty/netty/pull/10267
5458
test_spark32Implementation group: 'io.netty', name: 'netty-buffer', version: '4.1.52.Final'
59+
test_spark32Implementation group: 'io.openlineage', name: "openlineage-spark_$scalaVersion", version: '1.28.0-SNAPSHOT'
5560

5661
latestDepTestImplementation group: 'org.apache.spark', name: "spark-core_$scalaVersion", version: '+'
5762
latestDepTestImplementation group: 'org.apache.spark', name: "spark-sql_$scalaVersion", version: '+'
5863
latestDepTestImplementation group: 'org.apache.spark', name: "spark-yarn_$scalaVersion", version: '+'
64+
latestDepTestImplementation group: 'io.openlineage', name: "openlineage-spark_$scalaVersion", version: '1.28.0-SNAPSHOT'
65+
5966
}
6067

6168
tasks.named("test").configure {
6269
dependsOn "test_spark24"
6370
dependsOn "test_spark32"
6471
}
72+
73+
shadowJar {
74+
relocate "io.openlineage.spark", "shared.io.openlineage.spark_${scalaVersion}"
75+
}

dd-java-agent/instrumentation/spark/spark_2.13/build.gradle

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
plugins {
22
id 'java-test-fixtures'
3+
id 'com.github.johnrengelman.shadow'
34
}
45

56
// Support for 2.13 added in 3.2.0 https://issues.apache.org/jira/browse/SPARK-25075
@@ -36,22 +37,30 @@ dependencies {
3637

3738
compileOnly group: 'org.apache.spark', name: "spark-core_$scalaVersion", version: "$sparkVersion"
3839
compileOnly group: 'org.apache.spark', name: "spark-sql_$scalaVersion", version: "$sparkVersion"
40+
compileOnly group: 'io.openlineage', name: "openlineage-spark_$scalaVersion", version: '1.28.0-SNAPSHOT'
3941

4042
testImplementation(testFixtures(project(":dd-java-agent:instrumentation:spark")))
4143
testImplementation group: 'org.apache.spark', name: "spark-core_$scalaVersion", version: "$sparkVersion"
4244
testImplementation group: 'org.apache.spark', name: "spark-sql_$scalaVersion", version: "$sparkVersion"
4345
testImplementation group: 'org.apache.spark', name: "spark-yarn_$scalaVersion", version: "$sparkVersion"
46+
testImplementation group: 'io.openlineage', name: "openlineage-spark_$scalaVersion", version: '1.28.0-SNAPSHOT'
4447

4548
test_spark32Implementation group: 'org.apache.spark', name: "spark-core_$scalaVersion", version: "3.2.4"
4649
test_spark32Implementation group: 'org.apache.spark', name: "spark-sql_$scalaVersion", version: "3.2.4"
4750
test_spark32Implementation group: 'org.apache.spark', name: "spark-yarn_$scalaVersion", version: "3.2.4"
51+
test_spark32Implementation group: 'io.openlineage', name: "openlineage-spark_$scalaVersion", version: '1.28.0-SNAPSHOT'
4852

4953
// FIXME: Currently not working on Spark 4.0.0 preview releases.
5054
latestDepTestImplementation group: 'org.apache.spark', name: "spark-core_$scalaVersion", version: '3.+'
5155
latestDepTestImplementation group: 'org.apache.spark', name: "spark-sql_$scalaVersion", version: '3.+'
5256
latestDepTestImplementation group: 'org.apache.spark', name: "spark-yarn_$scalaVersion", version: '3.+'
57+
latestDepTestImplementation group: 'io.openlineage', name: "openlineage-spark_$scalaVersion", version: '1.28.0-SNAPSHOT'
5358
}
5459

5560
tasks.named("test").configure {
5661
dependsOn "test_spark32"
5762
}
63+
64+
//shadowJar {
65+
// relocate "io.openlineage.spark", "shared.io.openlineage.spark_${scalaVersion}"
66+
//}

dd-java-agent/instrumentation/spark/src/main/java/datadog/trace/instrumentation/spark/AbstractDatadogSparkListener.java

Lines changed: 134 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,8 @@
2525
import java.lang.invoke.MethodType;
2626
import java.time.OffsetDateTime;
2727
import java.time.format.DateTimeParseException;
28-
import java.util.ArrayList;
29-
import java.util.Arrays;
30-
import java.util.Collection;
31-
import java.util.HashMap;
32-
import java.util.Iterator;
33-
import java.util.LinkedHashMap;
34-
import java.util.List;
35-
import java.util.Map;
36-
import java.util.Optional;
37-
import java.util.Properties;
38-
import java.util.UUID;
28+
import java.util.*;
29+
import java.util.stream.Collectors;
3930
import org.apache.spark.ExceptionFailure;
4031
import org.apache.spark.SparkConf;
4132
import org.apache.spark.TaskFailedReason;
@@ -56,6 +47,8 @@
5647
import scala.Tuple2;
5748
import scala.collection.JavaConverters;
5849

50+
// import io.openlineage.spark.agent.OpenLineageSparkListener;
51+
5952
/**
6053
* Implementation of the SparkListener {@link SparkListener} to generate spans from the execution of
6154
* a spark application.
@@ -65,7 +58,7 @@
6558
* still needed
6659
*/
6760
public abstract class AbstractDatadogSparkListener extends SparkListener {
68-
private static final Logger log = LoggerFactory.getLogger(AbstractDatadogSparkListener.class);
61+
protected static final Logger log = LoggerFactory.getLogger(AbstractDatadogSparkListener.class);
6962
private static final ObjectMapper objectMapper = new ObjectMapper();
7063
public static volatile AbstractDatadogSparkListener listener = null;
7164
public static volatile boolean finishTraceOnApplicationEnd = true;
@@ -123,8 +116,10 @@ public abstract class AbstractDatadogSparkListener extends SparkListener {
123116
private long availableExecutorTime = 0;
124117

125118
private volatile boolean applicationEnded = false;
119+
private SparkListener openLineageSparkListener = null;
126120

127121
public AbstractDatadogSparkListener(SparkConf sparkConf, String appId, String sparkVersion) {
122+
log.error("STARTING DD SPARK LISTENER");
128123
tracer = AgentTracer.get();
129124

130125
this.sparkConf = sparkConf;
@@ -151,8 +146,72 @@ public AbstractDatadogSparkListener(SparkConf sparkConf, String appId, String sp
151146
finishApplication(System.currentTimeMillis(), null, 0, null);
152147
}
153148
}));
149+
log.error("Created datadog spark listener: {}", this.getClass().getSimpleName());
150+
loadOlSparkListener();
151+
}
154152

155-
log.info("Created datadog spark listener: {}", this.getClass().getSimpleName());
153+
void loadOlSparkListener() {
154+
List<ClassLoader> availableClassloaders =
155+
Thread.getAllStackTraces().keySet().stream()
156+
.map(Thread::getContextClassLoader)
157+
.filter(Objects::nonNull)
158+
.collect(Collectors.toList());
159+
String className = "io.openlineage.spark.agent.OpenLineageSparkListener";
160+
Class clazz = null;
161+
try {
162+
clazz = Class.forName(className);
163+
} catch (Exception e) {
164+
log.error("Failed to load OL Spark Listener via Class.forName: {}", e.toString());
165+
for (ClassLoader classLoader : availableClassloaders) {
166+
try {
167+
clazz = classLoader.loadClass(className);
168+
log.error("Loaded Spark Listener via classLoader: {}", classLoader);
169+
break;
170+
} catch (Exception ex) {
171+
log.error(
172+
"Failed to load OL Spark Listener via loadClass via ClassLoader {} - {}",
173+
classLoader,
174+
ex.toString());
175+
}
176+
try {
177+
clazz = classLoader.getParent().loadClass(className);
178+
log.error(
179+
"Loaded Spark Listener via parent classLoader: {} for CL {}",
180+
classLoader.getParent(),
181+
classLoader);
182+
break;
183+
} catch (Exception ex) {
184+
log.error(
185+
"Failed to load OL Spark Listener via loadClass via parent ClassLoader {} - {}",
186+
classLoader.getParent(),
187+
ex.toString());
188+
}
189+
}
190+
}
191+
if (clazz == null) {
192+
try {
193+
clazz = ClassLoader.getSystemClassLoader().loadClass(className);
194+
log.error(
195+
"Loaded Spark Listener via system classLoader: {}", ClassLoader.getSystemClassLoader());
196+
} catch (Exception ex) {
197+
log.error(
198+
"Failed to load OL Spark Listener via loadClass via SystemClassLoader {}",
199+
ex.toString());
200+
}
201+
}
202+
if (clazz == null) {
203+
return;
204+
}
205+
try {
206+
sparkConf.set("spark.openlineage.transport", "true");
207+
openLineageSparkListener =
208+
(SparkListener) clazz.getDeclaredConstructor(SparkConf.class).newInstance(sparkConf);
209+
;
210+
log.error(
211+
"Created OL spark listener: {}", openLineageSparkListener.getClass().getSimpleName());
212+
} catch (Exception e) {
213+
log.error("Failed to instantiate OL Spark Listener: {}", e.toString());
214+
}
156215
}
157216

158217
/** Resource name of the spark job. Provide an implementation based on a specific scala version */
@@ -176,6 +235,9 @@ public AbstractDatadogSparkListener(SparkConf sparkConf, String appId, String sp
176235
@Override
177236
public synchronized void onApplicationStart(SparkListenerApplicationStart applicationStart) {
178237
this.applicationStart = applicationStart;
238+
if (this.openLineageSparkListener != null) {
239+
this.openLineageSparkListener.onApplicationStart(applicationStart);
240+
}
179241
}
180242

181243
private void initApplicationSpanIfNotInitialized() {
@@ -237,51 +299,57 @@ public void onApplicationEnd(SparkListenerApplicationEnd applicationEnd) {
237299
if (finishTraceOnApplicationEnd) {
238300
finishApplication(applicationEnd.time(), null, 0, null);
239301
}
302+
if (this.openLineageSparkListener != null) {
303+
this.openLineageSparkListener.onApplicationEnd(applicationEnd);
304+
}
240305
}
241306

242307
public synchronized void finishApplication(
243308
long time, Throwable throwable, int exitCode, String msg) {
244309
log.info("Finishing spark application trace");
310+
return;
245311

246-
if (applicationEnded) {
247-
return;
248-
}
249-
applicationEnded = true;
250-
251-
if (applicationSpan == null && jobCount > 0) {
252-
// If the application span is not initialized, but spark jobs have been executed, all those
253-
// spark jobs were databricks or streaming. In this case we don't send the application span
254-
return;
255-
}
256-
initApplicationSpanIfNotInitialized();
257-
258-
if (throwable != null) {
259-
applicationSpan.addThrowable(throwable);
260-
} else if (exitCode != 0) {
261-
applicationSpan.setError(true);
262-
applicationSpan.setTag(
263-
DDTags.ERROR_TYPE, "Spark Application Failed with exit code " + exitCode);
264-
265-
String errorMessage = getErrorMessageWithoutStackTrace(msg);
266-
applicationSpan.setTag(DDTags.ERROR_MSG, errorMessage);
267-
applicationSpan.setTag(DDTags.ERROR_STACK, msg);
268-
} else if (lastJobFailed) {
269-
applicationSpan.setError(true);
270-
applicationSpan.setTag(DDTags.ERROR_TYPE, "Spark Application Failed");
271-
applicationSpan.setTag(DDTags.ERROR_MSG, lastJobFailedMessage);
272-
applicationSpan.setTag(DDTags.ERROR_STACK, lastJobFailedStackTrace);
273-
}
274-
275-
applicationMetrics.setSpanMetrics(applicationSpan);
276-
applicationSpan.setMetric("spark.max_executor_count", maxExecutorCount);
277-
applicationSpan.setMetric(
278-
"spark.available_executor_time", computeCurrentAvailableExecutorTime(time));
279-
280-
applicationSpan.finish(time * 1000);
281-
282-
// write traces synchronously:
283-
// as soon as the application finishes, the JVM starts to shut down
284-
tracer.flush();
312+
// if (applicationEnded) {
313+
// return;
314+
// }
315+
// applicationEnded = true;
316+
//
317+
// if (applicationSpan == null && jobCount > 0) {
318+
// // If the application span is not initialized, but spark jobs have been executed, all
319+
// those
320+
// // spark jobs were databricks or streaming. In this case we don't send the application
321+
// span
322+
// return;
323+
// }
324+
// initApplicationSpanIfNotInitialized();
325+
//
326+
// if (throwable != null) {
327+
// applicationSpan.addThrowable(throwable);
328+
// } else if (exitCode != 0) {
329+
// applicationSpan.setError(true);
330+
// applicationSpan.setTag(
331+
// DDTags.ERROR_TYPE, "Spark Application Failed with exit code " + exitCode);
332+
//
333+
// String errorMessage = getErrorMessageWithoutStackTrace(msg);
334+
// applicationSpan.setTag(DDTags.ERROR_MSG, errorMessage);
335+
// applicationSpan.setTag(DDTags.ERROR_STACK, msg);
336+
// } else if (lastJobFailed) {
337+
// applicationSpan.setError(true);
338+
// applicationSpan.setTag(DDTags.ERROR_TYPE, "Spark Application Failed");
339+
// applicationSpan.setTag(DDTags.ERROR_MSG, lastJobFailedMessage);
340+
// applicationSpan.setTag(DDTags.ERROR_STACK, lastJobFailedStackTrace);
341+
// }
342+
//
343+
// applicationMetrics.setSpanMetrics(applicationSpan);
344+
// applicationSpan.setMetric("spark.max_executor_count", maxExecutorCount);
345+
// applicationSpan.setMetric(
346+
// "spark.available_executor_time", computeCurrentAvailableExecutorTime(time));
347+
//
348+
// applicationSpan.finish(time * 1000);
349+
//
350+
// // write traces synchronously:
351+
// // as soon as the application finishes, the JVM starts to shut down
352+
// tracer.flush();
285353
}
286354

287355
private AgentSpan getOrCreateStreamingBatchSpan(
@@ -426,6 +494,9 @@ public synchronized void onJobStart(SparkListenerJobStart jobStart) {
426494
stageToJob.put(stageId, jobStart.jobId());
427495
}
428496
jobSpans.put(jobStart.jobId(), jobSpan);
497+
if (this.openLineageSparkListener != null) {
498+
this.openLineageSparkListener.onJobStart(jobStart);
499+
}
429500
}
430501

431502
@Override
@@ -458,6 +529,9 @@ public synchronized void onJobEnd(SparkListenerJobEnd jobEnd) {
458529
}
459530

460531
jobSpan.finish(jobEnd.time() * 1000);
532+
if (this.openLineageSparkListener != null) {
533+
this.openLineageSparkListener.onJobEnd(jobEnd);
534+
}
461535
}
462536

463537
@Override
@@ -624,6 +698,10 @@ public void onTaskEnd(SparkListenerTaskEnd taskEnd) {
624698

625699
Properties props = stageProperties.get(stageSpanKey);
626700
sendTaskSpan(stageSpan, taskEnd, props);
701+
702+
if (this.openLineageSparkListener != null) {
703+
this.openLineageSparkListener.onTaskEnd(taskEnd);
704+
}
627705
}
628706

629707
private void sendTaskSpan(
@@ -702,6 +780,10 @@ public void onOtherEvent(SparkListenerEvent event) {
702780
onSQLExecutionEnd((SparkListenerSQLExecutionEnd) event);
703781
}
704782

783+
if (this.openLineageSparkListener != null) {
784+
this.openLineageSparkListener.onOtherEvent(event);
785+
}
786+
705787
updateAdaptiveSQLPlan(event);
706788
}
707789

dd-java-agent/instrumentation/spark/src/main/java/datadog/trace/instrumentation/spark/AbstractSparkInstrumentation.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,16 @@ public static void enter(@Advice.Argument(0) SparkSubmitArguments submitArgs) {
7171
// prepareSubmitEnvironment might be called before/after runMain depending on spark version
7272
AbstractDatadogSparkListener.finishTraceOnApplicationEnd = true;
7373
}
74+
75+
if (!submitArgs.packages().contains("io.openlineage:openlineage-spark")) {
76+
if (submitArgs.packages().isEmpty()) {
77+
submitArgs.handle("--packages", "io.openlineage:openlineage-spark_2.12:1.28.0");
78+
} else {
79+
submitArgs.handle(
80+
"--packages",
81+
submitArgs.packages() + ",io.openlineage:openlineage-spark_2.12:1.28.0");
82+
}
83+
}
7484
}
7585
}
7686

0 commit comments

Comments
 (0)