Skip to content

Commit ab6cae4

Browse files
authored
Test PgBouncer against our code-level partitioning logic (#3178)
* test partition drops with pgbouncer * dedicated DDLPool to bypass pgbouncer * fix generate * add env var * min env var * update documentation * no go mod redundancy
1 parent f36cc54 commit ab6cae4

File tree

14 files changed

+403
-471
lines changed

14 files changed

+403
-471
lines changed

examples/go/guides/go.mod

Lines changed: 0 additions & 80 deletions
This file was deleted.

frontend/docs/pages/self-hosting/configuration-options.mdx

Lines changed: 35 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ The Hatchet server and engine can be configured via environment variables using
77
Hatchet uses the following environment variable prefixes:
88

99
- **`SERVER_`** (173 variables) - Main server configuration including runtime, authentication, encryption, monitoring, and integrations
10-
- **`DATABASE_`** (15 variables) - PostgreSQL database connection and configuration
10+
- **`DATABASE_`** (19 variables) - PostgreSQL database connection and configuration
1111
- **`READ_REPLICA_`** (4 variables) - Read replica database configuration
1212
- **`ADMIN_`** (3 variables) - Administrator user setup for initial seeding
1313
- **`DEFAULT_`** (3 variables) - Default tenant configuration
@@ -140,36 +140,40 @@ Variables marked with ⚠️ are conditionally required when specific features a
140140

141141
## Database Configuration
142142

143-
| Variable | Description | Default Value |
144-
| ----------------------------- | ----------------------------------------------------- | ------------------- |
145-
| `DATABASE_URL` | PostgreSQL connection string | `127.0.0.1` |
146-
| `DATABASE_POSTGRES_HOST` | PostgreSQL host | `127.0.0.1` |
147-
| `DATABASE_POSTGRES_PORT` | PostgreSQL port | `5431` |
148-
| `DATABASE_POSTGRES_USERNAME` | PostgreSQL username | `hatchet` |
149-
| `DATABASE_POSTGRES_PASSWORD` | PostgreSQL password | `hatchet` |
150-
| `DATABASE_POSTGRES_DB_NAME` | PostgreSQL database name | `hatchet` |
151-
| `DATABASE_POSTGRES_SSL_MODE` | PostgreSQL SSL mode | `disable` |
152-
| `DATABASE_MAX_CONNS` | Max database connections | `50` |
153-
| `DATABASE_MIN_CONNS` | Min database connections | `10` |
154-
| `DATABASE_MAX_QUEUE_CONNS` | Max queue connections | `50` |
155-
| `DATABASE_MIN_QUEUE_CONNS` | Min queue connections | `10` |
156-
| `DATABASE_MAX_CONN_LIFETIME` | Max lifetime of a connection | `15m` |
157-
| `DATABASE_MAX_CONN_IDLE_TIME` | Max time a connection can be idle before being closed | `1m` |
158-
| `DATABASE_LOG_QUERIES` | Log database queries | `false` |
159-
| `CACHE_DURATION` | Cache duration | `5s` |
160-
| `ADMIN_EMAIL` | Admin email for seeding | `admin@example.com` |
161-
| `ADMIN_PASSWORD` | Admin password for seeding | `Admin123!!` |
162-
| `ADMIN_NAME` | Admin name for seeding | `Admin` |
163-
| `DEFAULT_TENANT_NAME` | Default tenant name | `Default` |
164-
| `DEFAULT_TENANT_SLUG` | Default tenant slug | `default` |
165-
| `DEFAULT_TENANT_ID` | Default tenant ID | |
166-
| `SEED_DEVELOPMENT` | Development seeding flag | |
167-
| `READ_REPLICA_ENABLED` | Enable read replica | `false` |
168-
| `READ_REPLICA_DATABASE_URL` | Read replica database URL | |
169-
| `READ_REPLICA_MAX_CONNS` | Read replica max connections | `50` |
170-
| `READ_REPLICA_MIN_CONNS` | Read replica min connections | `10` |
171-
| `DATABASE_LOGGER_LEVEL` | Database logger level | |
172-
| `DATABASE_LOGGER_FORMAT` | Database logger format | |
143+
| Variable | Description | Default Value |
144+
| ----------------------------- | -------------------------------------------------------------------------- | ------------------- |
145+
| `DATABASE_URL` | PostgreSQL connection string | `127.0.0.1` |
146+
| `DATABASE_POSTGRES_HOST` | PostgreSQL host | `127.0.0.1` |
147+
| `DATABASE_POSTGRES_PORT` | PostgreSQL port | `5431` |
148+
| `DATABASE_POSTGRES_USERNAME` | PostgreSQL username | `hatchet` |
149+
| `DATABASE_POSTGRES_PASSWORD` | PostgreSQL password | `hatchet` |
150+
| `DATABASE_POSTGRES_DB_NAME` | PostgreSQL database name | `hatchet` |
151+
| `DATABASE_POSTGRES_SSL_MODE` | PostgreSQL SSL mode | `disable` |
152+
| `DATABASE_MAX_CONNS` | Max database connections | `50` |
153+
| `DATABASE_MIN_CONNS` | Min database connections | `10` |
154+
| `DATABASE_MAX_QUEUE_CONNS` | Max queue connections | `50` |
155+
| `DATABASE_MIN_QUEUE_CONNS` | Min queue connections | `10` |
156+
| `DATABASE_MAX_CONN_LIFETIME` | Max lifetime of a connection | `15m` |
157+
| `DATABASE_MAX_CONN_IDLE_TIME` | Max time a connection can be idle before being closed | `1m` |
158+
| `DATABASE_LOG_QUERIES` | Log database queries | `false` |
159+
| `DATABASE_PGBOUNCER_ENABLED` | Enable pgbouncer support; requires `DATABASE_DIRECT_URL` to be set | `false` |
160+
| `DATABASE_DIRECT_URL` | Direct PostgreSQL connection string bypassing pgbouncer for DDL operations | |
161+
| `DATABASE_DIRECT_MAX_CONNS` | Max connections for the direct (non-pgbouncer) pool | `2` |
162+
| `DATABASE_DIRECT_MIN_CONNS` | Min connections for the direct (non-pgbouncer) pool | `1` |
163+
| `CACHE_DURATION` | Cache duration | `5s` |
164+
| `ADMIN_EMAIL` | Admin email for seeding | `admin@example.com` |
165+
| `ADMIN_PASSWORD` | Admin password for seeding | `Admin123!!` |
166+
| `ADMIN_NAME` | Admin name for seeding | `Admin` |
167+
| `DEFAULT_TENANT_NAME` | Default tenant name | `Default` |
168+
| `DEFAULT_TENANT_SLUG` | Default tenant slug | `default` |
169+
| `DEFAULT_TENANT_ID` | Default tenant ID | |
170+
| `SEED_DEVELOPMENT` | Development seeding flag | |
171+
| `READ_REPLICA_ENABLED` | Enable read replica | `false` |
172+
| `READ_REPLICA_DATABASE_URL` | Read replica database URL | |
173+
| `READ_REPLICA_MAX_CONNS` | Read replica max connections | `50` |
174+
| `READ_REPLICA_MIN_CONNS` | Read replica min connections | `10` |
175+
| `DATABASE_LOGGER_LEVEL` | Database logger level | |
176+
| `DATABASE_LOGGER_FORMAT` | Database logger format | |
173177

174178
## Security Check Configuration
175179

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ require (
3838
github.com/pingcap/errors v0.11.4
3939
github.com/posthog/posthog-go v1.10.0
4040
github.com/pressly/goose/v3 v3.27.0
41+
github.com/sashabaranov/go-openai v1.41.2
4142
github.com/sethvargo/go-retry v0.3.0
4243
github.com/spf13/cobra v1.10.2
4344
github.com/spf13/viper v1.21.0

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,8 @@ github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6
407407
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
408408
github.com/sagikazarmark/locafero v0.11.0 h1:1iurJgmM9G3PA/I+wWYIOw/5SyBtxapeHDcg+AAIFXc=
409409
github.com/sagikazarmark/locafero v0.11.0/go.mod h1:nVIGvgyzw595SUSUE6tvCp3YYTeHs15MvlmU87WwIik=
410+
github.com/sashabaranov/go-openai v1.41.2 h1:vfPRBZNMpnqu8ELsclWcAvF19lDNgh1t6TVfFFOPiSM=
411+
github.com/sashabaranov/go-openai v1.41.2/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
410412
github.com/sethvargo/go-retry v0.3.0 h1:EEt31A35QhrcRZtrYFDTBg91cqZVnFL2navjDrah2SE=
411413
github.com/sethvargo/go-retry v0.3.0/go.mod h1:mNX17F0C/HguQMyMyJxcnU471gOZGxCLyYaFyAZraas=
412414
github.com/shirou/gopsutil/v4 v4.25.7 h1:bNb2JuqKuAu3tRlPv5piSmBZyMfecwQ+t/ILq+1JqVM=

pkg/config/database/config.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,18 @@ type ConfigFile struct {
2929
MaxQueueConns int `mapstructure:"maxQueueConns" json:"maxQueueConns,omitempty" default:"50"`
3030
MinQueueConns int `mapstructure:"minQueueConns" json:"minQueueConns,omitempty" default:"10"`
3131

32+
// PgBouncerEnabled indicates that the main DATABASE_URL connects through pgbouncer.
33+
// When true, DATABASE_DIRECT_URL must also be set so that DDL operations like
34+
// DETACH PARTITION CONCURRENTLY can bypass pgbouncer.
35+
PgBouncerEnabled bool `mapstructure:"pgbouncerEnabled" json:"pgbouncerEnabled,omitempty" default:"false"`
36+
37+
// DirectDatabaseURL is a connection string that bypasses pgbouncer and connects directly
38+
// to PostgreSQL. This is used for DDL operations like DETACH PARTITION CONCURRENTLY that
39+
// cannot run inside a transaction block. Required when PgBouncerEnabled is true.
40+
DirectDatabaseURL string `mapstructure:"directDatabaseUrl" json:"directDatabaseUrl,omitempty" default:""`
41+
DirectDatabaseMaxConns int `mapstructure:"directDatabaseMaxConns" json:"directDatabaseMaxConns,omitempty" default:"2"`
42+
DirectDatabaseMinConns int `mapstructure:"directDatabaseMinConns" json:"directDatabaseMinConns,omitempty" default:"1"`
43+
3244
MaxConnLifetime time.Duration `mapstructure:"maxConnLifetime" json:"maxConnLifetime,omitempty" default:"15m"`
3345
MaxConnIdleTime time.Duration `mapstructure:"maxConnIdleTime" json:"maxConnIdleTime,omitempty" default:"1m"`
3446

@@ -67,6 +79,11 @@ type Layer struct {
6779

6880
QueuePool *pgxpool.Pool
6981

82+
// DirectPool is a small pool (max 2 connections) that bypasses pgbouncer and connects
83+
// directly to PostgreSQL for DDL operations like DETACH PARTITION CONCURRENTLY.
84+
// If pgbouncer is not used, this may be nil and callers should fall back to Pool.
85+
DirectPool *pgxpool.Pool
86+
7087
V1 v1.Repository
7188

7289
Seed SeedConfigFile
@@ -87,6 +104,11 @@ func BindAllEnv(v *viper.Viper) {
87104
_ = v.BindEnv("maxConnLifetime", "DATABASE_MAX_CONN_LIFETIME")
88105
_ = v.BindEnv("maxConnIdleTime", "DATABASE_MAX_CONN_IDLE_TIME")
89106

107+
_ = v.BindEnv("pgbouncerEnabled", "DATABASE_PGBOUNCER_ENABLED")
108+
_ = v.BindEnv("directDatabaseUrl", "DATABASE_DIRECT_URL")
109+
_ = v.BindEnv("directDatabaseMaxConns", "DATABASE_DIRECT_MAX_CONNS")
110+
_ = v.BindEnv("directDatabaseMinConns", "DATABASE_DIRECT_MIN_CONNS")
111+
90112
_ = v.BindEnv("readReplicaEnabled", "READ_REPLICA_ENABLED")
91113
_ = v.BindEnv("readReplicaDatabaseUrl", "READ_REPLICA_DATABASE_URL")
92114
_ = v.BindEnv("readReplicaMaxConns", "READ_REPLICA_MAX_CONNS")

pkg/config/loader/loader.go

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,40 @@ func (c *ConfigLoader) InitDataLayer() (res *database.Layer, err error) {
263263
}
264264
}
265265

266+
// A small direct pool for DDL operations that cannot go through pgbouncer
267+
// (e.g. DETACH PARTITION CONCURRENTLY which cannot run inside a transaction block).
268+
var directPool *pgxpool.Pool
269+
270+
if cf.PgBouncerEnabled && cf.DirectDatabaseURL == "" {
271+
return nil, fmt.Errorf("DATABASE_PGBOUNCER_ENABLED is set but DATABASE_DIRECT_URL is not; " +
272+
"a direct PostgreSQL connection is required for DDL operations like DETACH PARTITION CONCURRENTLY")
273+
}
274+
275+
if cf.DirectDatabaseURL != "" {
276+
directConfig, err := pgxpool.ParseConfig(cf.DirectDatabaseURL)
277+
if err != nil {
278+
return nil, fmt.Errorf("could not parse direct database url: %w", err)
279+
}
280+
281+
if cf.DirectDatabaseMaxConns != 0 {
282+
directConfig.MaxConns = int32(cf.DirectDatabaseMaxConns) // nolint: gosec
283+
}
284+
285+
if cf.DirectDatabaseMinConns != 0 {
286+
directConfig.MinConns = int32(cf.DirectDatabaseMinConns) // nolint: gosec
287+
}
288+
289+
directConfig.MaxConnLifetime = cf.MaxConnLifetime
290+
directConfig.MaxConnIdleTime = cf.MaxConnIdleTime
291+
directConfig.AfterConnect = pgxpoolConnAfterConnect
292+
directConfig.ConnConfig.Tracer = otelpgx.NewTracer()
293+
294+
directPool, err = pgxpool.NewWithConfig(context.Background(), directConfig)
295+
if err != nil {
296+
return nil, fmt.Errorf("could not connect to direct database: %w", err)
297+
}
298+
}
299+
266300
ch := cache.New(cf.CacheDuration)
267301

268302
retentionPeriod, err := time.ParseDuration(scf.Runtime.Limits.DefaultTenantRetentionPeriod)
@@ -305,6 +339,7 @@ func (c *ConfigLoader) InitDataLayer() (res *database.Layer, err error) {
305339

306340
v1, cleanupV1 := repov1.NewRepository(
307341
pool,
342+
directPool,
308343
&l,
309344
cf.CacheDuration,
310345
retentionPeriod,
@@ -328,10 +363,11 @@ func (c *ConfigLoader) InitDataLayer() (res *database.Layer, err error) {
328363

329364
return cleanupV1()
330365
},
331-
Pool: pool,
332-
QueuePool: pool,
333-
V1: v1,
334-
Seed: cf.Seed,
366+
Pool: pool,
367+
QueuePool: pool,
368+
DirectPool: directPool,
369+
V1: v1,
370+
Seed: cf.Seed,
335371
}, nil
336372

337373
}

pkg/repository/olap.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ func NewOLAPRepositoryFromPool(
309309
) (OLAPRepository, func() error) {
310310
v := validator.NewDefaultValidator()
311311

312-
shared, cleanupShared := newSharedRepository(pool, v, l, payloadStoreOpts, tenantLimitConfig, enforceLimits, cacheDuration, enableDurableUserEventLog)
312+
shared, cleanupShared := newSharedRepository(pool, nil, v, l, payloadStoreOpts, tenantLimitConfig, enforceLimits, cacheDuration, enableDurableUserEventLog)
313313

314314
return newOLAPRepository(shared, olapRetentionPeriod, shouldPartitionEventsTables, statusUpdateBatchSizeLimits), cleanupShared
315315
}
@@ -400,10 +400,14 @@ func (r *OLAPRepositoryImpl) UpdateTablePartitions(ctx context.Context) error {
400400
r.l.Warn().Msgf("removing partitions before %s using retention period of %s", removeBefore.Format(time.RFC3339), r.olapRetentionPeriod)
401401
}
402402

403+
// Use the direct pool (bypasses pgbouncer) for DDL operations because
404+
// DETACH PARTITION CONCURRENTLY cannot run inside a transaction block.
405+
ddlPool := r.DDLPool()
406+
403407
for _, partition := range partitions {
404408
r.l.Debug().Msgf("detaching partition %s", partition.PartitionName)
405409

406-
conn, release, err := sqlchelpers.AcquireConnectionWithStatementTimeout(ctx, r.pool, r.l, 30*60*1000) // 30 minutes
410+
conn, release, err := sqlchelpers.AcquireConnectionWithStatementTimeout(ctx, ddlPool, r.l, 30*60*1000) // 30 minutes
407411

408412
if err != nil {
409413
return err

pkg/repository/repository.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ type repositoryImpl struct {
9292

9393
func NewRepository(
9494
pool *pgxpool.Pool,
95+
directPool *pgxpool.Pool,
9596
l *zerolog.Logger,
9697
cacheDuration time.Duration,
9798
taskRetentionPeriod, olapRetentionPeriod time.Duration,
@@ -105,7 +106,7 @@ func NewRepository(
105106
) (Repository, func() error) {
106107
v := validator.NewDefaultValidator()
107108

108-
shared, cleanupShared := newSharedRepository(pool, v, l, payloadStoreOpts, tenantLimitConfig, enforceLimits, cacheDuration, enableDurableUserEventLog)
109+
shared, cleanupShared := newSharedRepository(pool, directPool, v, l, payloadStoreOpts, tenantLimitConfig, enforceLimits, cacheDuration, enableDurableUserEventLog)
109110

110111
mq, cleanupMq := newMessageQueueRepository(shared)
111112

0 commit comments

Comments
 (0)