@@ -19,21 +19,17 @@ use std::{
19
19
} ;
20
20
21
21
use diesel:: {
22
- deserialize:: FromSql ,
23
22
dsl:: sql,
24
23
insert_into,
25
- pg:: Pg ,
26
24
r2d2:: { ConnectionManager , PooledConnection } ,
27
- select,
28
- serialize:: { Output , ToSql } ,
29
- sql_query,
25
+ select, sql_query,
30
26
sql_types:: { BigInt , Integer } ,
31
27
update, Connection as _, ExpressionMethods , OptionalExtension , PgConnection , QueryDsl ,
32
28
RunQueryDsl ,
33
29
} ;
34
30
use graph:: {
35
31
constraint_violation,
36
- prelude:: { info, o, warn, BlockNumber , BlockPtr , Logger , StoreError , ENV_VARS } ,
32
+ prelude:: { info, o, warn, BlockNumber , BlockPtr , Logger , StoreError } ,
37
33
schema:: EntityType ,
38
34
} ;
39
35
use itertools:: Itertools ;
@@ -43,17 +39,11 @@ use crate::{
43
39
dynds:: DataSourcesTable ,
44
40
primary:: { DeploymentId , Site } ,
45
41
relational:: index:: IndexList ,
42
+ vid_batcher:: AdaptiveBatchSize ,
46
43
} ;
47
44
use crate :: { connection_pool:: ConnectionPool , relational:: Layout } ;
48
45
use crate :: { relational:: Table , relational_queries as rq} ;
49
46
50
- /// The initial batch size for tables that do not have an array column
51
- const INITIAL_BATCH_SIZE : i64 = 10_000 ;
52
- /// The initial batch size for tables that do have an array column; those
53
- /// arrays can be large and large arrays will slow down copying a lot. We
54
- /// therefore tread lightly in that case
55
- const INITIAL_BATCH_SIZE_LIST : i64 = 100 ;
56
-
57
47
const LOG_INTERVAL : Duration = Duration :: from_secs ( 3 * 60 ) ;
58
48
59
49
/// If replicas are lagging by more than this, the copying code will pause
@@ -299,51 +289,6 @@ pub(crate) fn source(
299
289
. map_err ( StoreError :: from)
300
290
}
301
291
302
- /// Track the desired size of a batch in such a way that doing the next
303
- /// batch gets close to TARGET_DURATION for the time it takes to copy one
304
- /// batch, but don't step up the size by more than 2x at once
305
- #[ derive( Debug , Queryable ) ]
306
- pub ( crate ) struct AdaptiveBatchSize {
307
- pub size : i64 ,
308
- }
309
-
310
- impl AdaptiveBatchSize {
311
- pub fn new ( table : & Table ) -> Self {
312
- let size = if table. columns . iter ( ) . any ( |col| col. is_list ( ) ) {
313
- INITIAL_BATCH_SIZE_LIST
314
- } else {
315
- INITIAL_BATCH_SIZE
316
- } ;
317
-
318
- Self { size }
319
- }
320
-
321
- // adjust batch size by trying to extrapolate in such a way that we
322
- // get close to TARGET_DURATION for the time it takes to copy one
323
- // batch, but don't step up batch_size by more than 2x at once
324
- pub fn adapt ( & mut self , duration : Duration ) {
325
- // Avoid division by zero
326
- let duration = duration. as_millis ( ) . max ( 1 ) ;
327
- let new_batch_size = self . size as f64
328
- * ENV_VARS . store . batch_target_duration . as_millis ( ) as f64
329
- / duration as f64 ;
330
- self . size = ( 2 * self . size ) . min ( new_batch_size. round ( ) as i64 ) ;
331
- }
332
- }
333
-
334
- impl ToSql < BigInt , Pg > for AdaptiveBatchSize {
335
- fn to_sql < ' b > ( & ' b self , out : & mut Output < ' b , ' _ , Pg > ) -> diesel:: serialize:: Result {
336
- <i64 as ToSql < BigInt , Pg > >:: to_sql ( & self . size , out)
337
- }
338
- }
339
-
340
- impl FromSql < BigInt , Pg > for AdaptiveBatchSize {
341
- fn from_sql ( bytes : diesel:: pg:: PgValue ) -> diesel:: deserialize:: Result < Self > {
342
- let size = <i64 as FromSql < BigInt , Pg > >:: from_sql ( bytes) ?;
343
- Ok ( AdaptiveBatchSize { size } )
344
- }
345
- }
346
-
347
292
/// A helper to copy entities from one table to another in batches that are
348
293
/// small enough to not interfere with the rest of the operations happening
349
294
/// in the database. The `src` and `dst` table must have the same structure
0 commit comments