|
4 | 4 | import com.fasterxml.jackson.databind.DeserializationFeature; |
5 | 5 | import com.fasterxml.jackson.databind.ObjectMapper; |
6 | 6 | import datadog.trace.bootstrap.instrumentation.api.AgentSpan; |
| 7 | +import de.thetaphi.forbiddenapis.SuppressForbidden; |
7 | 8 | import java.io.ByteArrayOutputStream; |
8 | 9 | import java.io.IOException; |
| 10 | +import java.lang.invoke.MethodHandle; |
| 11 | +import java.lang.invoke.MethodHandles; |
| 12 | +import java.lang.invoke.MethodType; |
9 | 13 | import java.nio.charset.StandardCharsets; |
10 | 14 | import java.util.ArrayList; |
11 | 15 | import java.util.Collection; |
|
14 | 18 | import java.util.Map; |
15 | 19 | import java.util.Set; |
16 | 20 | import org.apache.spark.scheduler.AccumulableInfo; |
| 21 | +import org.apache.spark.sql.catalyst.analysis.NamedRelation; |
17 | 22 | import org.apache.spark.sql.catalyst.plans.logical.AppendData; |
18 | 23 | import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan; |
19 | 24 | import org.apache.spark.sql.execution.SparkPlanInfo; |
20 | 25 | import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation; |
21 | 26 | import org.apache.spark.sql.execution.metric.SQLMetricInfo; |
| 27 | +import org.apache.spark.sql.types.StructType; |
22 | 28 | import org.slf4j.Logger; |
23 | 29 | import org.slf4j.LoggerFactory; |
24 | 30 | import scala.PartialFunction; |
|
28 | 34 | public class SparkSQLUtils { |
29 | 35 | private static final Logger log = LoggerFactory.getLogger(SparkSQLUtils.class); |
30 | 36 |
|
| 37 | + private static final Class<?> dataSourceV2RelationClass; |
| 38 | + private static final MethodHandle schemaMethod; |
| 39 | + private static final MethodHandle nameMethod; |
| 40 | + private static final MethodHandle propertiesMethod; |
| 41 | + |
| 42 | + private static final Class<?> tableClass; |
| 43 | + |
| 44 | + @SuppressForbidden // Using reflection to avoid splitting the instrumentation once more |
| 45 | + private static Class<?> findDataSourceV2Relation() throws ClassNotFoundException { |
| 46 | + return Class.forName("org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation"); |
| 47 | + } |
| 48 | + |
| 49 | + @SuppressForbidden // Using reflection to avoid splitting the instrumentation once more |
| 50 | + private static Class<?> findTable() throws ClassNotFoundException { |
| 51 | + return Class.forName("org.apache.spark.sql.connector.catalog.Table"); |
| 52 | + } |
| 53 | + |
| 54 | + static { |
| 55 | + Class<?> relationClassFound = null; |
| 56 | + Class<?> tableClassFound = null; |
| 57 | + |
| 58 | + MethodHandle nameMethodFound = null; |
| 59 | + MethodHandle schemaMethodFound = null; |
| 60 | + MethodHandle propertiesMethodFound = null; |
| 61 | + |
| 62 | + try { |
| 63 | + MethodHandles.Lookup lookup = MethodHandles.lookup(); |
| 64 | + |
| 65 | + relationClassFound = findDataSourceV2Relation(); |
| 66 | + tableClassFound = findTable(); |
| 67 | + |
| 68 | + schemaMethodFound = |
| 69 | + lookup.findVirtual(tableClassFound, "schema", MethodType.methodType(StructType.class)); |
| 70 | + nameMethodFound = |
| 71 | + lookup.findVirtual(tableClassFound, "name", MethodType.methodType(String.class)); |
| 72 | + propertiesMethodFound = |
| 73 | + lookup.findVirtual(tableClassFound, "properties", MethodType.methodType(Map.class)); |
| 74 | + |
| 75 | + } catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException ignored) { |
| 76 | + } |
| 77 | + |
| 78 | + dataSourceV2RelationClass = relationClassFound; |
| 79 | + tableClass = tableClassFound; |
| 80 | + schemaMethod = schemaMethodFound; |
| 81 | + nameMethod = nameMethodFound; |
| 82 | + propertiesMethod = propertiesMethodFound; |
| 83 | + } |
| 84 | + |
31 | 85 | public static void addSQLPlanToStageSpan( |
32 | 86 | AgentSpan span, |
33 | 87 | SparkPlanInfo plan, |
@@ -264,31 +318,88 @@ public boolean isDefinedAt(LogicalPlan x) { |
264 | 318 |
|
265 | 319 | @Override |
266 | 320 | public LineageDataset apply(LogicalPlan x) { |
| 321 | + if (dataSourceV2RelationClass != null && dataSourceV2RelationClass.isInstance(x)) { |
| 322 | + log.info( |
| 323 | + "class {} is instance of {}", |
| 324 | + x.getClass().getName(), |
| 325 | + dataSourceV2RelationClass.getName()); |
| 326 | + return parseDataSourceV2Relation(x, "input"); |
| 327 | + } else if (x instanceof AppendData) { |
| 328 | + log.info( |
| 329 | + "class {} is instance of {}", x.getClass().getName(), AppendData.class.getName()); |
| 330 | + AppendData appendData = (AppendData) x; |
| 331 | + NamedRelation table = appendData.table(); |
| 332 | + if (dataSourceV2RelationClass != null && dataSourceV2RelationClass.isInstance(table)) { |
| 333 | + log.info( |
| 334 | + "class {} is instance of {}", |
| 335 | + table.getClass().getName(), |
| 336 | + dataSourceV2RelationClass.getName()); |
| 337 | + return parseDataSourceV2Relation(table, "output"); |
| 338 | + } |
| 339 | + } |
| 340 | + return null; |
| 341 | + } |
| 342 | + |
| 343 | + private LineageDataset parseDataSourceV2Relation(Object logicalPlan, String datasetType) { |
267 | 344 | try { |
268 | | - if (x instanceof DataSourceV2Relation) { |
269 | | - DataSourceV2Relation relation = (DataSourceV2Relation) x; |
270 | | - return new LineageDataset( |
271 | | - relation.table().name(), |
272 | | - relation.schema().json(), |
273 | | - "", |
274 | | - relation.table().properties().toString(), |
275 | | - "input"); |
276 | | - } else if (x instanceof AppendData) { |
277 | | - AppendData appendData = (AppendData) x; |
278 | | - DataSourceV2Relation relation = (DataSourceV2Relation) appendData.table(); |
279 | | - return new LineageDataset( |
280 | | - relation.table().name(), |
281 | | - relation.schema().json(), |
282 | | - "", |
283 | | - relation.table().properties().toString(), |
284 | | - "output"); |
| 345 | + String tableName = null; |
| 346 | + String tableSchema = null; |
| 347 | + String properties = null; |
| 348 | + |
| 349 | + if (logicalPlan.getClass().getMethod("table") == null) { |
| 350 | + log.info( |
| 351 | + "method table does not exist for {}, cannot parse current LogicalPlan", |
| 352 | + logicalPlan.getClass().getName()); |
| 353 | + return null; |
| 354 | + } |
| 355 | + |
| 356 | + Object table = logicalPlan.getClass().getMethod("table").invoke(logicalPlan); |
| 357 | + if (table == null) { |
| 358 | + log.info( |
| 359 | + "table is null for {}, cannot parse current LogicalPlan", |
| 360 | + logicalPlan.getClass().getName()); |
| 361 | + return null; |
| 362 | + } |
| 363 | + |
| 364 | + if (tableClass == null || !tableClass.isInstance(table)) { |
| 365 | + log.info("table is not instance of a Table class, cannot parse current LogicalPlan"); |
| 366 | + return null; |
| 367 | + } |
| 368 | + |
| 369 | + if (table.getClass().getMethod("name") != null) { |
| 370 | + tableName = (String) nameMethod.invoke(table); |
| 371 | + log.info( |
| 372 | + "method name exists for {} with table name {}", |
| 373 | + table.getClass().getName(), |
| 374 | + tableName); |
| 375 | + } else { |
| 376 | + log.info("method name does not exist for {}", table.getClass().getName()); |
285 | 377 | } |
286 | | - } catch (Exception e) { |
287 | | - log.debug("Error while converting logical plan to dataset", e); |
| 378 | + |
| 379 | + if (table.getClass().getMethod("schema") != null) { |
| 380 | + StructType schema = (StructType) schemaMethod.invoke(table); |
| 381 | + log.info( |
| 382 | + "method schema exists for {} with schema {}", table.getClass().getName(), schema); |
| 383 | + tableSchema = schema.json(); |
| 384 | + } else { |
| 385 | + log.info("method schema does not exist for {}", table.getClass().getName()); |
| 386 | + } |
| 387 | + |
| 388 | + if (table.getClass().getMethod("properties") != null) { |
| 389 | + Map<String, String> propertyMap = |
| 390 | + (Map<String, String>) propertiesMethod.invoke(table); |
| 391 | + properties = propertyMap.toString(); |
| 392 | + |
| 393 | + log.info("method properties found with content of {}", properties); |
| 394 | + } else { |
| 395 | + log.info("method properties does not exist for {}", table.getClass().getName()); |
| 396 | + } |
| 397 | + |
| 398 | + return new LineageDataset(tableName, tableSchema, "", properties, datasetType); |
| 399 | + } catch (Throwable ignored) { |
| 400 | + log.info("Error while converting logical plan to dataset", ignored); |
288 | 401 | return null; |
289 | 402 | } |
290 | | - |
291 | | - return null; |
292 | 403 | } |
293 | 404 | }; |
294 | 405 | } |
0 commit comments