@@ -1224,6 +1224,45 @@ begin
12241224end;
12251225$outer_migration_block$;
12261226
1227+ -- -----------------------------------------------------------------------------
1228+ -- 032-split-transaction-support.sql
1229+ do $outer_migration_block$ /* 032-split-transaction-support.sql*/
1230+ declare
1231+ _sql text ;
1232+ _migration record;
1233+ _migration_name text = $migration_name$032 - split- transaction- support .sql $migration_name$;
1234+ _migration_body text =
1235+ $migration_body$
1236+ -- rename loading_retries and loading_retry_after for all existing queue tables
1237+ do language plpgsql $block$
1238+ declare
1239+ _vectorizer record;
1240+ begin
1241+ for _vectorizer in select queue_schema, queue_table from ai .vectorizer
1242+ loop
1243+ execute format(' alter table %I.%I rename column loading_retries to attempts' , _vectorizer .queue_schema , _vectorizer .queue_table );
1244+ execute format(' alter table %I.%I rename column loading_retry_after to retry_after' , _vectorizer .queue_schema , _vectorizer .queue_table );
1245+ end loop;
1246+ end;
1247+ $block$;
1248+
1249+ $migration_body$;
1250+ begin
1251+ select * into _migration from ai .pgai_lib_migration where " name" operator(pg_catalog.= ) _migration_name;
1252+ if _migration is not null then
1253+ raise notice ' migration %s already applied. skipping.' , _migration_name;
1254+ if _migration .body operator(pg_catalog.!= ) _migration_body then
1255+ raise warning ' the contents of migration "%s" have changed' , _migration_name;
1256+ end if;
1257+ return;
1258+ end if;
1259+ _sql = pg_catalog .format (E' do /*%s*/ $migration_body$\n begin\n %s\n end;\n $migration_body$;' , _migration_name, _migration_body);
1260+ execute _sql;
1261+ insert into ai .pgai_lib_migration (" name" , body, applied_at_version)
1262+ values (_migration_name, _migration_body, $version$__version__$version$);
1263+ end;
1264+ $outer_migration_block$;
1265+
12271266-- ------------------------------------------------------------------------------
12281267-- 001-chunking.sql
12291268
@@ -2704,8 +2743,8 @@ begin
27042743 create table %I.%I
27052744 ( %s
27062745 , queued_at pg_catalog .timestamptz not null default now()
2707- , loading_retries pg_catalog .int4 not null default 0
2708- , loading_retry_after pg_catalog .timestamptz
2746+ , attempts pg_catalog .int4 not null default 0
2747+ , retry_after pg_catalog .timestamptz
27092748 )
27102749 $sql$
27112750 , queue_schema, queue_table
@@ -3548,6 +3587,85 @@ language plpgsql volatile security invoker
35483587set search_path to pg_catalog, pg_temp
35493588;
35503589
3590+ create or replace function ai ._get_next_queue_batch(
3591+ queue_table pg_catalog .regclass ,
3592+ batch_size pg_catalog .int4
3593+ ) returns setof record AS $$
3594+ declare
3595+ source_pk pg_catalog .jsonb ;
3596+ lock_id_string pg_catalog .text ;
3597+ query pg_catalog .text ;
3598+ lock_count pg_catalog .int4 := 0 ;
3599+ row record;
3600+ begin
3601+ -- get the source_pk for this queue table
3602+ select v .source_pk
3603+ into source_pk
3604+ from ai .vectorizer v
3605+ where pg_catalog .to_regclass (pg_catalog .format (' %I.%I' , v .queue_schema , v .queue_table )) operator(pg_catalog.= ) _get_next_queue_batch .queue_table ;
3606+
3607+ -- construct the "lock id string"
3608+ -- this is a string of all pk column names and their values, e.g. for a
3609+ -- two-column pk consisting of 'time' and 'url' this will generate:
3610+ -- hashtext(format('time|%s|url|%s', time, url))
3611+ select pg_catalog .format ($fmt$pg_catalog .hashtext (pg_catalog .format (' %s' , %s))$fmt$, format_string, format_args)
3612+ into lock_id_string
3613+ from (
3614+ select
3615+ pg_catalog .string_agg (pg_catalog .format (' %s|%%s' , attname), ' |' order by attnum) as format_string
3616+ , pg_catalog .string_agg (attname, ' , ' order by attnum) as format_args
3617+ from pg_catalog .jsonb_to_recordset (source_pk) as (attnum int , attname text )
3618+ ) as _;
3619+
3620+ -- TODO: for very small batch sizes (<10), an array _may_ be faster
3621+ create temporary table seen_lock_ids (lock_id bigint );
3622+ create index on seen_lock_ids (lock_id);
3623+
3624+ -- construct query to get all
3625+ query := pg_catalog .format ($sql$
3626+ select
3627+ q .ctid as _ctid
3628+ , %s as _lock_id
3629+ , q.*
3630+ from %s as q
3631+ where (retry_after is null or retry_after <= now())
3632+ and %s not in (
3633+ -- exclude all locks that we already hold
3634+ select objid::int
3635+ from pg_locks
3636+ where locktype = ' advisory'
3637+ and pid = pg_catalog .pg_backend_pid ()
3638+ and classid = %s
3639+ )
3640+ $sql$, lock_id_string, _get_next_queue_batch .queue_table , lock_id_string, _get_next_queue_batch .queue_table ::pg_catalog .oid );
3641+
3642+ for row in execute query
3643+ loop
3644+ if lock_count operator(pg_catalog.>= ) batch_size then
3645+ exit;
3646+ end if;
3647+
3648+ if exists(select 1 from pg_temp .seen_lock_ids WHERE lock_id operator(pg_catalog.= ) row ._lock_id ) then
3649+ continue;
3650+ end if;
3651+
3652+ insert into pg_temp .seen_lock_ids (lock_id) values (row ._lock_id );
3653+
3654+ if pg_catalog .pg_try_advisory_lock (queue_table::pg_catalog .oid ::int , row ._lock_id ) then
3655+ lock_count := lock_count operator(pg_catalog.+ ) 1 ;
3656+ return next row;
3657+ end if;
3658+ end loop;
3659+
3660+ drop table seen_lock_ids;
3661+
3662+ return;
3663+ end;
3664+ $$ language plpgsql
3665+ set search_path to pg_catalog, pg_temp
3666+ ;
3667+
3668+
35513669-- ------------------------------------------------------------------------------
35523670-- 012-vectorizer-api.sql
35533671-- -----------------------------------------------------------------------------
0 commit comments