|
18 | 18 |
|
19 | 19 | import com.google.cloud.bigquery.BigQuery; |
20 | 20 | import com.google.cloud.bigquery.DatasetId; |
| 21 | +import com.google.cloud.bigquery.Job; |
| 22 | +import com.google.cloud.bigquery.JobStatistics; |
| 23 | +import com.google.cloud.bigquery.QueryStage; |
21 | 24 | import com.google.cloud.bigquery.StandardTableDefinition; |
22 | 25 | import com.google.cloud.bigquery.Table; |
23 | 26 | import com.google.cloud.bigquery.TableDefinition; |
24 | 27 | import com.google.cloud.bigquery.TableId; |
25 | 28 | import com.google.cloud.bigquery.TableInfo; |
| 29 | +import com.google.gson.Gson; |
26 | 30 | import io.cdap.cdap.api.data.schema.Schema; |
27 | 31 | import io.cdap.cdap.etl.api.engine.sql.SQLEngineException; |
28 | 32 | import io.cdap.cdap.etl.api.join.JoinCondition; |
|
41 | 45 | import java.util.Map; |
42 | 46 | import java.util.UUID; |
43 | 47 | import java.util.concurrent.TimeUnit; |
| 48 | +import java.util.stream.Collectors; |
44 | 49 | import javax.annotation.Nullable; |
45 | 50 |
|
46 | 51 | /** |
|
49 | 54 | public class BigQuerySQLEngineUtils { |
50 | 55 |
|
51 | 56 | private static final Logger LOG = LoggerFactory.getLogger(BigQuerySQLEngineUtils.class); |
| 57 | + private static final Gson GSON = new Gson(); |
52 | 58 |
|
53 | 59 | public static final String GCS_PATH_FORMAT = BigQuerySinkUtils.GS_PATH_FORMAT + "/%s"; |
54 | 60 | public static final String BQ_TABLE_NAME_FORMAT = "%s_%s"; |
@@ -218,7 +224,7 @@ public static void validateOnExpressionJoinCondition(JoinCondition.OnExpression |
218 | 224 |
|
219 | 225 | /** |
220 | 226 | * Validates stages for a Join on Key operation |
221 | | - * |
| 227 | + * <p> |
222 | 228 | * TODO: Update logic once BQ SQL engine joins support multiple outer join tables |
223 | 229 | * |
224 | 230 | * @param joinDefinition Join Definition to validate |
@@ -292,4 +298,98 @@ public static Map<String, String> getJobTags(String operation) { |
292 | 298 | labels.put("pushdown_operation", operation); |
293 | 299 | return Collections.unmodifiableMap(labels); |
294 | 300 | } |
| 301 | + |
| 302 | + /** |
| 303 | + * Logs information about a BigQUery Job execution using a specified Logger instance |
| 304 | + * |
| 305 | + * @param job BigQuery Job |
| 306 | + */ |
| 307 | + public static void logJobMetrics(Job job) { |
| 308 | + // Ensure job has statistics information |
| 309 | + if (job.getStatistics() == null) { |
| 310 | + LOG.warn("No statistics were found for BigQuery job {}", job.getJobId()); |
| 311 | + } |
| 312 | + |
| 313 | + String startTimeStr = getISODateTimeString(job.getStatistics().getStartTime()); |
| 314 | + String endTimeStr = getISODateTimeString(job.getStatistics().getEndTime()); |
| 315 | + String executionTimeStr = getExecutionTimeString(job.getStatistics().getStartTime(), |
| 316 | + job.getStatistics().getEndTime()); |
| 317 | + |
| 318 | + // Print detailed query statistics if available |
| 319 | + if (job.getStatistics() instanceof JobStatistics.QueryStatistics) { |
| 320 | + JobStatistics.QueryStatistics queryStatistics = (JobStatistics.QueryStatistics) job.getStatistics(); |
| 321 | + LOG.info("Metrics for job {}:\n" + |
| 322 | + " Start: {} ,\n" + |
| 323 | + " End: {} ,\n" + |
| 324 | + " Execution time: {} ,\n" + |
| 325 | + " Processed Bytes: {} ,\n" + |
| 326 | + " Billed Bytes: {} ,\n" + |
| 327 | + " Total Slot ms: {} ,\n" + |
| 328 | + " Records per stage (read/write): {}", |
| 329 | + job.getJobId().getJob(), |
| 330 | + startTimeStr, |
| 331 | + endTimeStr, |
| 332 | + executionTimeStr, |
| 333 | + queryStatistics.getTotalBytesProcessed(), |
| 334 | + queryStatistics.getTotalBytesBilled(), |
| 335 | + queryStatistics.getTotalSlotMs(), |
| 336 | + getQueryStageRecordCounts(queryStatistics.getQueryPlan())); |
| 337 | + |
| 338 | + if (LOG.isTraceEnabled()) { |
| 339 | + LOG.trace("Additional Metrics for job {}:\n" + |
| 340 | + " Query Plan: {} ,\n" + |
| 341 | + " Query Timeline: {} \n", |
| 342 | + job.getJobId().getJob(), |
| 343 | + GSON.toJson(queryStatistics.getQueryPlan()), |
| 344 | + GSON.toJson(queryStatistics.getTimeline())); |
| 345 | + } |
| 346 | + |
| 347 | + return; |
| 348 | + } |
| 349 | + |
| 350 | + // Print basic metrics |
| 351 | + JobStatistics statistics = job.getStatistics(); |
| 352 | + LOG.info("Metrics for job: {}\n" + |
| 353 | + " Start: {} ,\n" + |
| 354 | + " End: {} ,\n" + |
| 355 | + " Execution time: {}", |
| 356 | + job.getJobId().getJob(), |
| 357 | + startTimeStr, |
| 358 | + endTimeStr, |
| 359 | + executionTimeStr); |
| 360 | + } |
| 361 | + |
| 362 | + private static String getISODateTimeString(Long epoch) { |
| 363 | + if (epoch == null) { |
| 364 | + return "N/A"; |
| 365 | + } |
| 366 | + |
| 367 | + return Instant.ofEpochMilli(epoch).toString(); |
| 368 | + } |
| 369 | + |
| 370 | + private static String getExecutionTimeString(Long startEpoch, Long endEpoch) { |
| 371 | + if (startEpoch == null || endEpoch == null) { |
| 372 | + return "N/A"; |
| 373 | + } |
| 374 | + |
| 375 | + return (endEpoch - startEpoch) + " ms"; |
| 376 | + } |
| 377 | + |
| 378 | + private static String getQueryStageRecordCounts(List<QueryStage> queryPlan) { |
| 379 | + if (queryPlan == null || queryPlan.isEmpty()) { |
| 380 | + return "N/A"; |
| 381 | + } |
| 382 | + |
| 383 | + return queryPlan.stream() |
| 384 | + .map(qs -> formatRecordCount(qs.getRecordsRead()) + "/" + formatRecordCount(qs.getRecordsWritten())) |
| 385 | + .collect(Collectors.joining(" , ", "[ ", " ]")); |
| 386 | + } |
| 387 | + |
| 388 | + private static String formatRecordCount(Long val) { |
| 389 | + if (val == null) { |
| 390 | + return "N/A"; |
| 391 | + } |
| 392 | + |
| 393 | + return val.toString(); |
| 394 | + } |
295 | 395 | } |
0 commit comments