Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/build-cloudberry.yml
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,9 @@ jobs:
{"test":"ic-isolation2",
"make_configs":["src/test/isolation2:installcheck-isolation2"]
},
{"test":"ic-isolation2-hot-standby",
"make_configs":["src/test/isolation2:installcheck-hot-standby"]
},
{"test":"ic-isolation2-crash",
"make_configs":["src/test/isolation2:installcheck-isolation2-crash"],
"enable_core_check":false
Expand Down
10 changes: 10 additions & 0 deletions src/backend/access/transam/xlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ int XLogArchiveTimeout = 0;
int XLogArchiveMode = ARCHIVE_MODE_OFF;
char *XLogArchiveCommand = NULL;
bool EnableHotStandby = false;
bool EnableHotDR = false;
bool fullPageWrites = true;
bool wal_log_hints = false;
bool wal_compression = false;
Expand Down Expand Up @@ -7967,6 +7968,12 @@ StartupXLOG(void)
if (gp_pause_on_restore_point_replay)
pauseRecoveryOnRestorePoint(xlogreader);

/* Exit the recovery loop if a promotion is triggered in pauseRecoveryOnRestorePoint() */
if (reachedContinuousRecoveryTarget && recoveryTargetAction == RECOVERY_TARGET_ACTION_PROMOTE){
reachedRecoveryTarget = true;
break;
}

/* Exit loop if we reached inclusive recovery target */
if (recoveryStopsAfter(xlogreader))
{
Expand Down Expand Up @@ -10757,6 +10764,9 @@ XLogRestorePoint(const char *rpName)
xlrec.rp_time = GetCurrentTimestamp();
strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);

/* LogHotStandby for the restore here */
LogStandbySnapshot();

XLogBeginInsert();
XLogRegisterData((char *) &xlrec, sizeof(xl_restore_point));

Expand Down
40 changes: 32 additions & 8 deletions src/backend/cdb/cdbutil.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ static int CdbComponentDatabaseInfoCompare(const void *p1, const void *p2);

static GpSegConfigEntry * readGpSegConfigFromCatalog(int *total_dbs);
static GpSegConfigEntry * readGpSegConfigFromFTSFiles(int *total_dbs);
static GpSegConfigEntry * readGpSegConfigFromFiles(int *total_dbs);

static void getAddressesForDBid(GpSegConfigEntry *c, int elevel);
static HTAB *hostPrimaryCountHashTableInit(void);
Expand Down Expand Up @@ -131,6 +132,15 @@ typedef struct HostPrimaryCountEntry
*/
static GpSegConfigEntry *
readGpSegConfigFromFTSFiles(int *total_dbs)
{
Assert(!IsTransactionState() && !IS_HOT_DR_CLUSTER());
/* notify and wait FTS to finish a probe and update the dump file */
FtsNotifyProber();
return readGpSegConfigFromFiles(total_dbs);
}

static GpSegConfigEntry *
readGpSegConfigFromFiles(int *total_dbs)
{
FILE *fd;
int idx = 0;
Expand All @@ -142,11 +152,6 @@ readGpSegConfigFromFTSFiles(int *total_dbs)
char address[MAXHOSTNAMELEN];
char buf[MAXHOSTNAMELEN * 2 + 32];

Assert(!IsTransactionState());

/* notify and wait FTS to finish a probe and update the dump file */
FtsNotifyProber();

fd = AllocateFile(GPSEGCONFIGDUMPFILE, "r");

if (!fd)
Expand Down Expand Up @@ -188,6 +193,18 @@ readGpSegConfigFromFTSFiles(int *total_dbs)
return configs;
}

bool
checkGpSegConfigFtsFiles()
{
FILE *fd = AllocateFile(GPSEGCONFIGDUMPFILE, "r");

if (!fd)
return false;

FreeFile(fd);
return true;
}

/*
* writeGpSegConfigToFTSFiles() dump gp_segment_configuration to the file
* GPSEGCONFIGDUMPFILE, in $PGDATA, only FTS process can use this function.
Expand Down Expand Up @@ -372,10 +389,17 @@ getCdbComponentInfo(void)

HTAB *hostPrimaryCountHash = hostPrimaryCountHashTableInit();

if (IsTransactionState())
configs = readGpSegConfigFromCatalog(&total_dbs);
if (EnableHotDR)
{
configs = readGpSegConfigFromFiles(&total_dbs);
}
else
configs = readGpSegConfigFromFTSFiles(&total_dbs);
{
if (IsTransactionState())
configs = readGpSegConfigFromCatalog(&total_dbs);
else
configs = readGpSegConfigFromFTSFiles(&total_dbs);
}

component_databases = palloc0(sizeof(CdbComponentDatabases));

Expand Down
27 changes: 27 additions & 0 deletions src/backend/utils/misc/guc_gp.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ static bool check_optimizer(bool *newval, void **extra, GucSource source);
static bool check_verify_gpfdists_cert(bool *newval, void **extra, GucSource source);
static bool check_dispatch_log_stats(bool *newval, void **extra, GucSource source);
static bool check_gp_workfile_compression(bool *newval, void **extra, GucSource source);
static bool check_hot_dr(bool *newval, void **extra, GucSource source);

/* Helper function for guc setter */
bool gpvars_check_gp_resqueue_priority_default_value(char **newval,
Expand Down Expand Up @@ -3331,6 +3332,16 @@ struct config_bool ConfigureNamesBool_gp[] =
NULL, NULL, NULL
},

{
{"hot_dr", PGC_POSTMASTER, REPLICATION_STANDBY,
gettext_noop("DR Cluster as well as allows connteions and queries"),
NULL
},
&EnableHotDR,
false,
check_hot_dr, NULL, NULL
},

{
{"gp_enable_runtime_filter_pushdown", PGC_USERSET, DEVELOPER_OPTIONS,
gettext_noop("Try to push the hash table of hash join to the seqscan or AM as bloom filter."),
Expand Down Expand Up @@ -5455,6 +5466,22 @@ check_verify_gpfdists_cert(bool *newval, void **extra, GucSource source)
return true;
}

static bool
check_hot_dr(bool *newval, void **extra, GucSource source)
{
if (*newval && !EnableHotStandby)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot enable \"hot_dr\" when \"hot_standby\" is false")));

if (*newval && IS_QUERY_DISPATCHER() && !checkGpSegConfigFtsFiles())
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("cannot enable \"hot_dr\" since DR cluster segment configuration file does not exits")));

return true;
}

static bool
check_dispatch_log_stats(bool *newval, void **extra, GucSource source)
{
Expand Down
1 change: 1 addition & 0 deletions src/include/access/xlog.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ extern int XLogArchiveTimeout;
extern int wal_retrieve_retry_interval;
extern char *XLogArchiveCommand;
extern bool EnableHotStandby;
extern bool EnableHotDR;

extern bool fullPageWrites;
extern bool wal_log_hints;
Expand Down
1 change: 1 addition & 0 deletions src/include/cdb/cdbutil.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ extern char *getDnsAddress(char *name, int port, int elevel);

#ifdef USE_INTERNAL_FTS
extern void writeGpSegConfigToFTSFiles(void);
extern bool checkGpSegConfigFtsFiles(void);
#else

GpSegConfigEntry * readGpSegConfig(char * buff, int *total_dbs);
Expand Down
1 change: 1 addition & 0 deletions src/include/cdb/cdbvars.h
Original file line number Diff line number Diff line change
Expand Up @@ -757,6 +757,7 @@ extern GpId GpIdentity;
#define MAX_DBID_STRING_LENGTH 11

#define UNINITIALIZED_GP_IDENTITY_VALUE (-10000)
#define IS_HOT_DR_CLUSTER() (EnableHotDR)
#define IS_QUERY_DISPATCHER() (GpIdentity.segindex == MASTER_CONTENT_ID)
#define IS_HOT_STANDBY_QD() (EnableHotStandby && IS_QUERY_DISPATCHER() && RecoveryInProgress())

Expand Down
1 change: 1 addition & 0 deletions src/include/utils/unsync_guc_name.h
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@
"gp_workfile_limit_per_segment",
"gp_workfile_max_entries",
"hba_file",
"hot_dr",
"hot_standby",
"hot_standby_feedback",
"huge_pages",
Expand Down
3 changes: 2 additions & 1 deletion src/test/isolation2/expected/hot_standby/faults.out
Original file line number Diff line number Diff line change
Expand Up @@ -133,14 +133,15 @@ select gp_inject_fault('out_of_recovery_in_startupxlog', 'reset', dbid) from gp_
ERROR: primary segments can only process MPP protocol messages from primary QD (seg1 slice1 127.0.1.1:7006 pid=14671)
HINT: Exit the current session and re-connect.
-1Sq: ... <quitting>

-- start_ignore
-- will fail due to downed mirror (previous primary)
-1S: select * from hs_failover;
ERROR: failed to acquire resources on one or more segments
DETAIL: connection to server at "10.13.9.74", port 7003 failed: Connection refused
Is the server running on that host and accepting TCP/IP connections?
(seg1 10.13.9.74:7003)
-1Sq: ... <quitting>
-- end_ignore

-- bring the downed mirror up
!\retcode gprecoverseg -aF;
Expand Down
3 changes: 2 additions & 1 deletion src/test/isolation2/sql/hot_standby/faults.sql
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,11 @@ select gp_inject_fault('out_of_recovery_in_startupxlog', 'reset', dbid) from gp_
-- in an existing gang. That mirror is now a primary, so it will complain and the query fails.
-1S: select * from hs_failover;
-1Sq:

-- start_ignore
-- will fail due to downed mirror (previous primary)
-1S: select * from hs_failover;
-1Sq:
-- end_ignore

-- bring the downed mirror up
!\retcode gprecoverseg -aF;
Expand Down
Loading