DataDog · charlesmyu · Oct 28, 2025 · Oct 10, 2025 · Oct 10, 2025 · Oct 14, 2025
@@ -8,15 +8,20 @@
 import datadog.trace.api.Config;
 import net.bytebuddy.asm.Advice;
 import org.apache.spark.SparkContext;
+import org.apache.spark.sql.execution.SparkPlan;
+import org.apache.spark.sql.execution.SparkPlanInfo;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import scala.collection.JavaConverters;
+import scala.collection.immutable.HashMap;
 
 @AutoService(InstrumenterModule.class)
 public class Spark212Instrumentation extends AbstractSparkInstrumentation {
   @Override
   public String[] helperClassNames() {
     return new String[] {
       packageName + ".AbstractDatadogSparkListener",
+      packageName + ".AbstractSparkPlanUtils",
       packageName + ".DatabricksParentContext",
       packageName + ".OpenlineageParentContext",
       packageName + ".DatadogSpark212Listener",
@@ -27,9 +32,22 @@ public String[] helperClassNames() {
       packageName + ".SparkSQLUtils",
       packageName + ".SparkSQLUtils$SparkPlanInfoForStage",
       packageName + ".SparkSQLUtils$AccumulatorWithStage",
+      packageName + ".Spark212PlanUtils"
     };
   }
 
+  @Override
+  public String[] knownMatchingTypes() {
+    String[] res = new String[super.knownMatchingTypes().length + 1];
+    int idx = 0;
+    for (String match : super.knownMatchingTypes()) {
+      res[idx] = match;
+      idx++;
+    }
+    res[idx] = "org.apache.spark.sql.execution.SparkPlanInfo$";
+    return res;
+  }
+
   @Override
   public void methodAdvice(MethodTransformer transformer) {
     super.methodAdvice(transformer);
@@ -40,6 +58,13 @@ public void methodAdvice(MethodTransformer transformer) {
             .and(isDeclaredBy(named("org.apache.spark.SparkContext")))
             .and(takesNoArguments()),
         Spark212Instrumentation.class.getName() + "$InjectListener");
+
+    transformer.applyAdvice(
+        isMethod()
+            .and(named("fromSparkPlan"))
+            .and(takesArgument(0, named("org.apache.spark.sql.execution.SparkPlan")))
+            .and(isDeclaredBy(named("org.apache.spark.sql.execution.SparkPlanInfo$"))),
+        Spark212Instrumentation.class.getName() + "$SparkPlanInfoAdvice");
   }
 
   public static class InjectListener {
@@ -78,4 +103,24 @@ public static void enter(@Advice.This SparkContext sparkContext) {
       sparkContext.listenerBus().addToSharedQueue(AbstractDatadogSparkListener.listener);
     }
   }
+
+  public static class SparkPlanInfoAdvice {
+    @Advice.OnMethodExit(suppress = Throwable.class, onThrowable = Throwable.class)
+    public static void exit(
+        @Advice.Return(readOnly = false) SparkPlanInfo planInfo,
+        @Advice.Argument(0) SparkPlan plan) {
+      if (planInfo.metadata().size() == 0) {
+        Spark212PlanUtils planUtils = new Spark212PlanUtils();
+        HashMap<String, String> args = new HashMap<>();
+        planInfo =
+            new SparkPlanInfo(
+                planInfo.nodeName(),
+                planInfo.simpleString(),
+                planInfo.children(),
+                args.$plus$plus(
+                    JavaConverters.mapAsScalaMap(planUtils.extractFormattedProduct(plan))),
+                planInfo.metrics());
+      }
+    }
+  }
 }
diff --git a/...spark/spark_2.12/src/main/java/datadog/trace/instrumentation/spark/Spark212PlanUtils.java b/...spark/spark_2.12/src/main/java/datadog/trace/instrumentation/spark/Spark212PlanUtils.java
@@ -0,0 +1,9 @@
+package datadog.trace.instrumentation.spark;
+
+import org.apache.spark.sql.catalyst.trees.TreeNode;
+
+public class Spark212PlanUtils extends AbstractSparkPlanUtils {
+  public String getKey(int idx, TreeNode node) {
+    return String.format("_dd.unknown_key.%d", idx);
+  }
+}
@@ -8,15 +8,20 @@
 import datadog.trace.api.Config;
 import net.bytebuddy.asm.Advice;
 import org.apache.spark.SparkContext;
+import org.apache.spark.sql.execution.SparkPlan;
+import org.apache.spark.sql.execution.SparkPlanInfo;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import scala.collection.JavaConverters;
+import scala.collection.immutable.HashMap;
 
 @AutoService(InstrumenterModule.class)
 public class Spark213Instrumentation extends AbstractSparkInstrumentation {
   @Override
   public String[] helperClassNames() {
     return new String[] {
       packageName + ".AbstractDatadogSparkListener",
+      packageName + ".AbstractSparkPlanUtils",
       packageName + ".DatabricksParentContext",
       packageName + ".OpenlineageParentContext",
       packageName + ".DatadogSpark213Listener",
@@ -27,9 +32,22 @@ public String[] helperClassNames() {
       packageName + ".SparkSQLUtils",
       packageName + ".SparkSQLUtils$SparkPlanInfoForStage",
       packageName + ".SparkSQLUtils$AccumulatorWithStage",
+      packageName + ".Spark213PlanUtils"
     };
   }
 
+  @Override
+  public String[] knownMatchingTypes() {
+    String[] res = new String[super.knownMatchingTypes().length + 1];
+    int idx = 0;
+    for (String match : super.knownMatchingTypes()) {
+      res[idx] = match;
+      idx++;
+    }
+    res[idx] = "org.apache.spark.sql.execution.SparkPlanInfo$";
+    return res;
+  }
+
   @Override
   public void methodAdvice(MethodTransformer transformer) {
     super.methodAdvice(transformer);
@@ -40,6 +58,13 @@ public void methodAdvice(MethodTransformer transformer) {
             .and(isDeclaredBy(named("org.apache.spark.SparkContext")))
             .and(takesNoArguments()),
         Spark213Instrumentation.class.getName() + "$InjectListener");
+
+    transformer.applyAdvice(
+        isMethod()
+            .and(named("fromSparkPlan"))
+            .and(takesArgument(0, named("org.apache.spark.sql.execution.SparkPlan")))
+            .and(isDeclaredBy(named("org.apache.spark.sql.execution.SparkPlanInfo$"))),
+        Spark213Instrumentation.class.getName() + "$SparkPlanInfoAdvice");
   }
 
   public static class InjectListener {
@@ -79,4 +104,23 @@ public static void enter(@Advice.This SparkContext sparkContext) {
       sparkContext.listenerBus().addToSharedQueue(AbstractDatadogSparkListener.listener);
     }
   }
+
+  public static class SparkPlanInfoAdvice {
+    @Advice.OnMethodExit(suppress = Throwable.class, onThrowable = Throwable.class)
+    public static void exit(
+        @Advice.Return(readOnly = false) SparkPlanInfo planInfo,
+        @Advice.Argument(0) SparkPlan plan) {
+      if (planInfo.metadata().size() == 0) {
+        Spark213PlanUtils planUtils = new Spark213PlanUtils();
+        planInfo =
+            new SparkPlanInfo(
+                planInfo.nodeName(),
+                planInfo.simpleString(),
+                planInfo.children(),
+                HashMap.from(
+                    JavaConverters.asScala(planUtils.extractFormattedProduct(plan)).toList()),
-                    JavaConverters.asScala(planUtils.extractFormattedProduct(plan)).toList()),
+                    JavaConverters.asScala(planUtils.extractFormattedProduct(plan))),
-                    JavaConverters.asScala(planUtils.extractFormattedProduct(plan)).toList()),
+                    JavaConverters.asScala(planUtils.extractFormattedProduct(plan))),
+                planInfo.metrics());
+      }
+    }
+  }
 }
diff --git a/...spark/spark_2.13/src/main/java/datadog/trace/instrumentation/spark/Spark213PlanUtils.java b/...spark/spark_2.13/src/main/java/datadog/trace/instrumentation/spark/Spark213PlanUtils.java
@@ -0,0 +1,9 @@
+package datadog.trace.instrumentation.spark;
+
+import org.apache.spark.sql.catalyst.trees.TreeNode;
+
+public class Spark213PlanUtils extends AbstractSparkPlanUtils {
+  public String getKey(int idx, TreeNode node) {
+    return node.productElementName(idx);
+  }
+}
diff --git a/...ation/spark/src/main/java/datadog/trace/instrumentation/spark/AbstractSparkPlanUtils.java b/...ation/spark/src/main/java/datadog/trace/instrumentation/spark/AbstractSparkPlanUtils.java
@@ -0,0 +1,171 @@
+package datadog.trace.instrumentation.spark;
+
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import org.apache.spark.Partitioner;
+import org.apache.spark.sql.catalyst.expressions.Attribute;
+import org.apache.spark.sql.catalyst.plans.JoinType;
+import org.apache.spark.sql.catalyst.plans.QueryPlan;
+import org.apache.spark.sql.catalyst.plans.physical.BroadcastMode;
+import org.apache.spark.sql.catalyst.plans.physical.Partitioning;
+import org.apache.spark.sql.catalyst.trees.TreeNode;
+import scala.Option;
+import scala.collection.Iterable;
+import scala.collection.JavaConverters;
+
+// An extension of how Spark translates `SparkPlan`s to `SparkPlanInfo`, see here:
+// https://github.com/apache/spark/blob/v3.5.0/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala#L54
+public abstract class AbstractSparkPlanUtils {
+  private final int MAX_DEPTH = 4;
+  private final int MAX_LENGTH = 50;
+  private final ObjectMapper mapper =
+      new ObjectMapper().configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+
+  private final Class[] SAFE_CLASSES = {
+    Attribute.class, // simpleString appends data type, avoid by using toString
+    JoinType.class, // enum
+    Partitioner.class, // not a product or TreeNode
+    BroadcastMode.class, // not a product or TreeNode
+    maybeGetClass("org.apache.spark.sql.execution.exchange.ShuffleOrigin"), // enum (v3+)
+    maybeGetClass("org.apache.spark.sql.catalyst.optimizer.BuildSide"), // enum (v3+)
+    maybeGetClass(
+        "org.apache.spark.sql.execution.ShufflePartitionSpec"), // not a product or TreeNode (v3+)
+  };
+
+  public abstract String getKey(int idx, TreeNode node);
+
+  public Map<String, String> extractFormattedProduct(TreeNode plan) {
+    HashMap<String, String> result = new HashMap<>();
+    extractPlanProduct(plan, 0)
+        .forEach(
+            (key, value) -> {
+              result.put(key, writeObjectToString(value));
+            });
+    return result;
+  }
+
+  protected Map<String, Object> extractPlanProduct(TreeNode node, int depth) {
+    HashMap<String, Object> args = new HashMap<>();
+    HashMap<String, String> unparsed = new HashMap<>();
+
+    int i = 0;
+    for (Iterator<Object> it = JavaConverters.asJavaIterator(node.productIterator());
+        it.hasNext(); ) {
+      Object obj = it.next();
+
+      Object val = parsePlanProduct(obj, depth);
+      if (val != null) {
+        args.put(getKey(i, node), val);
+      } else {
+        unparsed.put(getKey(i, node), obj.getClass().getName());
+      }
+
+      i++;
+    }
+
+    if (unparsed.size() > 0) {
+      // For now, place what we can't parse here with the types so we're aware of them
+      args.put("_dd.unparsed", unparsed);
+    }
+    return args;
+  }
+
+  // Should only call on final values being written to `meta`
+  protected String writeObjectToString(Object value) {
+    try {
+      return mapper.writeValueAsString(value);
+    } catch (IOException e) {
+      return null;
+    }
+  }
+
+  protected Object parsePlanProduct(Object value, int depth) {
+    // This function MUST not arbitrarily serialize the object as we can't be sure what it is.
+    // A null return indicates object is unserializable, otherwise it should really only return
+    // valid
+    // JSON types (Array, Map, String, Boolean, Number, null)
+
+    if (value == null) {
+      return "null";
+    } else if (value instanceof String
+        || value instanceof Boolean
+        || Number.class.isInstance(value)) {
+      return value;
+    } else if (value instanceof Option) {
+      return parsePlanProduct(((Option) value).getOrElse(() -> "none"), depth);
+    } else if (value instanceof QueryPlan) {
+      // don't duplicate child nodes
+      return null;
+    } else if (value instanceof Iterable && depth < MAX_DEPTH) {
+      ArrayList<Object> list = new ArrayList<>();
+      for (Object item : JavaConverters.asJavaIterable((Iterable) value)) {
+        Object res = parsePlanProduct(item, depth + 1);
+        if (list.size() < MAX_LENGTH && res != null) {
+          list.add(res);
+        }
+      }
+      return list;
+    } else if (value instanceof Partitioning) {
+      if (value instanceof TreeNode && depth + 1 < MAX_DEPTH) {
+        HashMap<String, Object> inner = new HashMap<>();
+        inner.put(
+            value.getClass().getSimpleName(), extractPlanProduct(((TreeNode) value), depth + 2));
+        return inner;
+      } else {
+        return value.toString();
+      }
+    } else if (instanceOf(value, SAFE_CLASSES)) {
+      return value.toString();
+    } else if (value instanceof TreeNode) {
+      // fallback case, leave at bottom
+      return getSimpleString((TreeNode) value);
+    }
+
+    return null;
+  }
+
+  private String getSimpleString(TreeNode value) {
+    try {
+      // in Spark v3+, the signature of `simpleString` includes an int parameter for `maxFields`
+      return TreeNode.class
+          .getDeclaredMethod("simpleString", new Class[] {int.class})
+          .invoke(value, MAX_LENGTH)
+          .toString();
+    } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException exception) {
+      try {
+        // Attempt the Spark v2 `simpleString` signature
+        return TreeNode.class.getDeclaredMethod("simpleString").invoke(value).toString();
+      } catch (NoSuchMethodException
+          | IllegalAccessException
+          | InvocationTargetException innerException) {
+      }
+
+      return null;
+    }
+  }
+
+  // Use reflection rather than native `instanceof` for classes added in later Spark versions
+  private boolean instanceOf(Object value, Class[] classes) {
+    for (Class cls : classes) {
+      if (cls != null && cls.isInstance(value)) {
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+  private Class maybeGetClass(String cls) {
+    try {
+      return Class.forName(cls);
+    } catch (ClassNotFoundException e) {
+      return null;
+    }
+  }
+}