|
1 | 1 | use crate::prelude::*; |
2 | 2 |
|
3 | | -use futures::future::{join, join_all, try_join_all}; |
4 | | -use itertools::Itertools; |
| 3 | +use futures::future::try_join_all; |
5 | 4 | use log::error; |
6 | | -use serde::Serialize; |
7 | 5 | use sqlx::PgPool; |
8 | 6 | use std::collections::{HashMap, HashSet}; |
9 | | -use std::sync::atomic::{AtomicUsize, Ordering::Relaxed}; |
| 7 | +use std::sync::atomic::Ordering::Relaxed; |
10 | 8 |
|
11 | 9 | use super::db_tracking::{self, read_source_tracking_info_for_processing, TrackedTargetKey}; |
12 | 10 | use super::db_tracking_setup; |
| 11 | +use super::evaluator::{evaluate_source_entry, ScopeValueBuilder}; |
13 | 12 | use super::memoization::{EvaluationMemory, EvaluationMemoryOptions, StoredMemoizationInfo}; |
| 13 | +use super::stats; |
| 14 | + |
14 | 15 | use crate::base::schema; |
15 | 16 | use crate::base::value::{self, FieldValues, KeyValue}; |
16 | 17 | use crate::builder::plan::*; |
17 | | -use crate::ops::interface::{ |
18 | | - ExportTargetMutation, ExportTargetUpsertEntry, Ordinal, SourceExecutorListOptions, |
19 | | -}; |
| 18 | +use crate::ops::interface::{ExportTargetMutation, ExportTargetUpsertEntry, Ordinal}; |
20 | 19 | use crate::utils::db::WriteAction; |
21 | 20 | use crate::utils::fingerprint::{Fingerprint, Fingerprinter}; |
22 | 21 |
|
23 | | -use super::evaluator::{evaluate_source_entry, ScopeValueBuilder}; |
24 | | - |
25 | | -#[derive(Debug, Serialize, Default)] |
26 | | -pub struct UpdateStats { |
27 | | - pub num_skipped: AtomicUsize, |
28 | | - pub num_insertions: AtomicUsize, |
29 | | - pub num_deletions: AtomicUsize, |
30 | | - pub num_repreocesses: AtomicUsize, |
31 | | - pub num_errors: AtomicUsize, |
32 | | -} |
33 | | - |
34 | | -impl std::fmt::Display for UpdateStats { |
35 | | - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
36 | | - let num_skipped = self.num_skipped.load(Relaxed); |
37 | | - if num_skipped > 0 { |
38 | | - write!(f, "{} rows skipped", num_skipped)?; |
39 | | - } |
40 | | - |
41 | | - let num_insertions = self.num_insertions.load(Relaxed); |
42 | | - let num_deletions = self.num_deletions.load(Relaxed); |
43 | | - let num_reprocesses = self.num_repreocesses.load(Relaxed); |
44 | | - let num_source_rows = num_insertions + num_deletions + num_reprocesses; |
45 | | - if num_source_rows > 0 { |
46 | | - if num_skipped > 0 { |
47 | | - write!(f, ", ")?; |
48 | | - } |
49 | | - write!(f, "{num_source_rows} source rows processed",)?; |
50 | | - |
51 | | - let num_errors = self.num_errors.load(Relaxed); |
52 | | - if num_errors > 0 { |
53 | | - write!(f, " with {num_errors} ERRORS",)?; |
54 | | - } |
55 | | - write!( |
56 | | - f, |
57 | | - ": {num_insertions} added, {num_deletions} removed, {num_reprocesses} repocessed", |
58 | | - )?; |
59 | | - } |
60 | | - Ok(()) |
61 | | - } |
62 | | -} |
63 | | - |
64 | | -#[derive(Debug, Serialize)] |
65 | | -pub struct SourceUpdateInfo { |
66 | | - pub source_name: String, |
67 | | - pub stats: UpdateStats, |
68 | | -} |
69 | | - |
70 | | -impl std::fmt::Display for SourceUpdateInfo { |
71 | | - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
72 | | - write!(f, "{}: {}", self.source_name, self.stats) |
73 | | - } |
74 | | -} |
75 | | - |
76 | | -#[derive(Debug, Serialize)] |
77 | | -pub struct IndexUpdateInfo { |
78 | | - pub sources: Vec<SourceUpdateInfo>, |
79 | | -} |
80 | | - |
81 | | -impl std::fmt::Display for IndexUpdateInfo { |
82 | | - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
83 | | - for source in self.sources.iter() { |
84 | | - writeln!(f, "{}", source)?; |
85 | | - } |
86 | | - Ok(()) |
87 | | - } |
88 | | -} |
89 | | - |
90 | 22 | pub fn extract_primary_key( |
91 | 23 | primary_key_def: &AnalyzedPrimaryKeyDef, |
92 | 24 | record: &FieldValues, |
@@ -470,14 +402,14 @@ pub async fn evaluate_source_entry_with_memory( |
470 | 402 | Ok(Some(output)) |
471 | 403 | } |
472 | 404 |
|
473 | | -pub async fn update_source_entry( |
| 405 | +pub async fn update_source_row( |
474 | 406 | plan: &ExecutionPlan, |
475 | 407 | source_op: &AnalyzedSourceOp, |
476 | 408 | schema: &schema::DataSchema, |
477 | 409 | key: &value::KeyValue, |
478 | 410 | only_for_deletion: bool, |
479 | 411 | pool: &PgPool, |
480 | | - stats: &UpdateStats, |
| 412 | + stats: &stats::UpdateStats, |
481 | 413 | ) -> Result<()> { |
482 | 414 | let source_key_json = serde_json::to_value(key)?; |
483 | 415 | let process_timestamp = chrono::Utc::now(); |
@@ -617,85 +549,18 @@ pub async fn update_source_entry( |
617 | 549 | Ok(()) |
618 | 550 | } |
619 | 551 |
|
620 | | -async fn update_source_entry_with_err_handling( |
| 552 | +pub(super) async fn update_source_row_with_err_handling( |
621 | 553 | plan: &ExecutionPlan, |
622 | 554 | source_op: &AnalyzedSourceOp, |
623 | 555 | schema: &schema::DataSchema, |
624 | 556 | key: &value::KeyValue, |
625 | 557 | only_for_deletion: bool, |
626 | 558 | pool: &PgPool, |
627 | | - stats: &UpdateStats, |
| 559 | + stats: &stats::UpdateStats, |
628 | 560 | ) { |
629 | | - let r = update_source_entry(plan, source_op, schema, key, only_for_deletion, pool, stats).await; |
| 561 | + let r = update_source_row(plan, source_op, schema, key, only_for_deletion, pool, stats).await; |
630 | 562 | if let Err(e) = r { |
631 | 563 | stats.num_errors.fetch_add(1, Relaxed); |
632 | 564 | error!("{:?}", e.context("Error in indexing a source row")); |
633 | 565 | } |
634 | 566 | } |
635 | | - |
636 | | -async fn update_source( |
637 | | - source_name: &str, |
638 | | - plan: &ExecutionPlan, |
639 | | - source_op: &AnalyzedSourceOp, |
640 | | - schema: &schema::DataSchema, |
641 | | - pool: &PgPool, |
642 | | -) -> Result<SourceUpdateInfo> { |
643 | | - let existing_keys_json = db_tracking::list_source_tracking_keys( |
644 | | - source_op.source_id, |
645 | | - &plan.tracking_table_setup, |
646 | | - pool, |
647 | | - ) |
648 | | - .await?; |
649 | | - |
650 | | - let mut keys = Vec::new(); |
651 | | - let mut rows_stream = source_op.executor.list(SourceExecutorListOptions { |
652 | | - include_ordinal: false, |
653 | | - }); |
654 | | - while let Some(rows) = rows_stream.next().await { |
655 | | - keys.extend(rows?.into_iter().map(|row| row.key)); |
656 | | - } |
657 | | - |
658 | | - let stats = UpdateStats::default(); |
659 | | - let upsert_futs = join_all(keys.iter().map(|key| { |
660 | | - update_source_entry_with_err_handling(plan, source_op, schema, key, false, pool, &stats) |
661 | | - })); |
662 | | - let deleted_keys = existing_keys_json |
663 | | - .into_iter() |
664 | | - .map(|existing_key_json| { |
665 | | - value::Value::<value::ScopeValue>::from_json( |
666 | | - existing_key_json.source_key, |
667 | | - &source_op.primary_key_type, |
668 | | - )? |
669 | | - .as_key() |
670 | | - }) |
671 | | - .filter_ok(|existing_key| !keys.contains(existing_key)) |
672 | | - .collect::<Result<Vec<_>>>()?; |
673 | | - let delete_futs = join_all(deleted_keys.iter().map(|key| { |
674 | | - update_source_entry_with_err_handling(plan, source_op, schema, key, true, pool, &stats) |
675 | | - })); |
676 | | - join(upsert_futs, delete_futs).await; |
677 | | - |
678 | | - Ok(SourceUpdateInfo { |
679 | | - source_name: source_name.to_string(), |
680 | | - stats, |
681 | | - }) |
682 | | -} |
683 | | - |
684 | | -pub async fn update( |
685 | | - plan: &ExecutionPlan, |
686 | | - schema: &schema::DataSchema, |
687 | | - pool: &PgPool, |
688 | | -) -> Result<IndexUpdateInfo> { |
689 | | - let source_update_stats = try_join_all( |
690 | | - plan.source_ops |
691 | | - .iter() |
692 | | - .map(|source_op| async move { |
693 | | - update_source(source_op.name.as_str(), plan, source_op, schema, pool).await |
694 | | - }) |
695 | | - .collect::<Vec<_>>(), |
696 | | - ) |
697 | | - .await?; |
698 | | - Ok(IndexUpdateInfo { |
699 | | - sources: source_update_stats, |
700 | | - }) |
701 | | -} |
0 commit comments