Skip to content

Commit a596316

Browse files
committed
[Issue #310] Detect timeline switch via repprotocol
1 parent 4ab117b commit a596316

File tree

6 files changed

+217
-9
lines changed

6 files changed

+217
-9
lines changed

src/backup.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,8 +165,10 @@ do_backup_instance(PGconn *backup_conn, PGNodeInfo *nodeInfo, bool no_sync, bool
165165
"trying to look up on previous timelines",
166166
current.tli);
167167

168-
/* TODO: use read_timeline_history */
169-
tli_list = catalog_get_timelines(&instance_config);
168+
tli_list = get_history_streaming(&instance_config.conn_opt, current.tli, backup_list);
169+
if (!tli_list)
170+
/* fallback to using archive */
171+
tli_list = catalog_get_timelines(&instance_config);
170172

171173
if (parray_num(tli_list) == 0)
172174
elog(WARNING, "Cannot find valid backup on previous timelines, "

src/catalog.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ typedef struct LockInfo
4848
bool exclusive;
4949
} LockInfo;
5050

51-
static timelineInfo *
51+
timelineInfo *
5252
timelineInfoNew(TimeLineID tli)
5353
{
5454
timelineInfo *tlinfo = (timelineInfo *) pgut_malloc(sizeof(timelineInfo));
@@ -74,7 +74,8 @@ timelineInfoFree(void *tliInfo)
7474

7575
if (tli->backups)
7676
{
77-
parray_walk(tli->backups, pgBackupFree);
77+
/* backups themselves should freed separately */
78+
// parray_walk(tli->backups, pgBackupFree);
7879
parray_free(tli->backups);
7980
}
8081

src/pg_probackup.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -891,6 +891,8 @@ extern int validate_one_page(Page page, BlockNumber absolute_blkno,
891891
uint32 checksum_version);
892892
extern bool validate_tablespace_map(pgBackup *backup);
893893

894+
extern parray* get_history_streaming(ConnectionOptions *conn_opt, TimeLineID tli, parray *backup_list);
895+
894896
/* return codes for validate_one_page */
895897
/* TODO: use enum */
896898
#define PAGE_IS_VALID (-1)
@@ -920,6 +922,7 @@ extern pgBackup *catalog_get_last_data_backup(parray *backup_list,
920922
extern pgBackup *get_multi_timeline_parent(parray *backup_list, parray *tli_list,
921923
TimeLineID current_tli, time_t current_start_time,
922924
InstanceConfig *instance);
925+
extern timelineInfo *timelineInfoNew(TimeLineID tli);
923926
extern void timelineInfoFree(void *tliInfo);
924927
extern parray *catalog_get_timelines(InstanceConfig *instance);
925928
extern void do_set_backup(const char *instance_name, time_t backup_id,

src/stream.c

Lines changed: 202 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "pg_probackup.h"
1111
#include "receivelog.h"
1212
#include "streamutil.h"
13+
#include "access/timeline.h"
1314

1415
#include <time.h>
1516
#include <unistd.h>
@@ -69,6 +70,7 @@ static void add_walsegment_to_filelist(parray *filelist, uint32 timeline,
6970
uint32 xlog_seg_size);
7071
static void add_history_file_to_filelist(parray *filelist, uint32 timeline,
7172
char *basedir);
73+
static parray* parse_tli_history_buffer(char *history, TimeLineID tli);
7274

7375
/*
7476
* Run IDENTIFY_SYSTEM through a given connection and
@@ -353,6 +355,204 @@ stop_streaming(XLogRecPtr xlogpos, uint32 timeline, bool segment_finished)
353355

354356
/* --- External API --- */
355357

358+
/*
359+
* Maybe add a StreamOptions struct ?
360+
* Backup conn only needed to calculate stream_stop_timeout. Think about refactoring it.
361+
*/
362+
parray*
363+
get_history_streaming(ConnectionOptions *conn_opt, TimeLineID tli, parray *backup_list)
364+
{
365+
PGresult *res;
366+
PGconn *conn;
367+
char *history;
368+
char query[128];
369+
parray *result = NULL;
370+
parray *tli_list = NULL;
371+
timelineInfo *tlinfo = NULL;
372+
int i,j;
373+
374+
snprintf(query, sizeof(query), "TIMELINE_HISTORY %u", tli);
375+
376+
/*
377+
* Connect in replication mode to the server.
378+
*/
379+
conn = pgut_connect_replication(conn_opt->pghost,
380+
conn_opt->pgport,
381+
conn_opt->pgdatabase,
382+
conn_opt->pguser,
383+
false);
384+
385+
if (!conn)
386+
return NULL;
387+
388+
res = PQexec(conn, query);
389+
PQfinish(conn);
390+
391+
if (PQresultStatus(res) != PGRES_TUPLES_OK)
392+
{
393+
elog(WARNING, "Could not send replication command \"%s\": %s",
394+
query, PQresultErrorMessage(res));
395+
PQclear(res);
396+
return NULL;
397+
}
398+
399+
/*
400+
* The response to TIMELINE_HISTORY is a single row result set
401+
* with two fields: filename and content
402+
*/
403+
404+
if (PQnfields(res) != 2 || PQntuples(res) != 1)
405+
{
406+
elog(WARNING, "Unexpected response to TIMELINE_HISTORY command: "
407+
"got %d rows and %d fields, expected %d rows and %d fields",
408+
PQntuples(res), PQnfields(res), 1, 2);
409+
PQclear(res);
410+
return NULL;
411+
}
412+
413+
history = pgut_strdup(PQgetvalue(res, 0, 1));
414+
result = parse_tli_history_buffer(history, tli);
415+
416+
/* some cleanup */
417+
pg_free(history);
418+
PQclear(res);
419+
420+
if (result)
421+
tlinfo = timelineInfoNew(tli);
422+
else
423+
return NULL;
424+
425+
/* transform TimeLineHistoryEntry into timelineInfo */
426+
for (i = parray_num(result) -1; i >= 0; i--)
427+
{
428+
TimeLineHistoryEntry *tln = (TimeLineHistoryEntry *) parray_get(result, i);
429+
430+
tlinfo->parent_tli = tln->tli;
431+
tlinfo->switchpoint = tln->end;
432+
433+
if (!tli_list)
434+
tli_list = parray_new();
435+
436+
parray_append(tli_list, tlinfo);
437+
438+
/* Next tli */
439+
tlinfo = timelineInfoNew(tln->tli);
440+
441+
/* oldest tli */
442+
if (i == 0)
443+
{
444+
tlinfo->tli = tln->tli;
445+
tlinfo->parent_tli = 0;
446+
tlinfo->switchpoint = 0;
447+
parray_append(tli_list, tlinfo);
448+
}
449+
}
450+
451+
/* link parent to child */
452+
for (i = 0; i < parray_num(tli_list); i++)
453+
{
454+
timelineInfo *tlinfo = (timelineInfo *) parray_get(tli_list, i);
455+
456+
for (j = 0; j < parray_num(tli_list); j++)
457+
{
458+
timelineInfo *tlinfo_parent = (timelineInfo *) parray_get(tli_list, j);
459+
460+
if (tlinfo->parent_tli == tlinfo_parent->tli)
461+
{
462+
tlinfo->parent_link = tlinfo_parent;
463+
break;
464+
}
465+
}
466+
}
467+
468+
/* add backups to each timeline info */
469+
for (i = 0; i < parray_num(tli_list); i++)
470+
{
471+
timelineInfo *tlinfo = parray_get(tli_list, i);
472+
for (j = 0; j < parray_num(backup_list); j++)
473+
{
474+
pgBackup *backup = parray_get(backup_list, j);
475+
if (tlinfo->tli == backup->tli)
476+
{
477+
if (tlinfo->backups == NULL)
478+
tlinfo->backups = parray_new();
479+
parray_append(tlinfo->backups, backup);
480+
}
481+
}
482+
}
483+
484+
/* cleanup */
485+
parray_walk(result, pg_free);
486+
pg_free(result);
487+
488+
return tli_list;
489+
}
490+
491+
parray*
492+
parse_tli_history_buffer(char *history, TimeLineID tli)
493+
{
494+
char *curLine = history;
495+
TimeLineHistoryEntry *entry;
496+
TimeLineHistoryEntry *last_timeline = NULL;
497+
parray *result = NULL;
498+
499+
/* Parse timeline history buffer string by string */
500+
while (curLine)
501+
{
502+
char tempStr[1024];
503+
char *nextLine = strchr(curLine, '\n');
504+
int curLineLen = nextLine ? (nextLine-curLine) : strlen(curLine);
505+
506+
memcpy(tempStr, curLine, curLineLen);
507+
tempStr[curLineLen] = '\0'; // NUL-terminate!
508+
curLine = nextLine ? (nextLine+1) : NULL;
509+
510+
if (curLineLen > 0)
511+
{
512+
char *ptr;
513+
TimeLineID tli;
514+
uint32 switchpoint_hi;
515+
uint32 switchpoint_lo;
516+
int nfields;
517+
518+
for (ptr = tempStr; *ptr; ptr++)
519+
{
520+
if (!isspace((unsigned char) *ptr))
521+
break;
522+
}
523+
if (*ptr == '\0' || *ptr == '#')
524+
continue;
525+
526+
nfields = sscanf(tempStr, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
527+
528+
if (nfields < 1)
529+
{
530+
/* expect a numeric timeline ID as first field of line */
531+
elog(ERROR, "Syntax error in timeline history: \"%s\". Expected a numeric timeline ID.", tempStr);
532+
}
533+
if (nfields != 3)
534+
elog(ERROR, "Syntax error in timeline history: \"%s\". Expected a transaction log switchpoint location.", tempStr);
535+
536+
if (last_timeline && tli <= last_timeline->tli)
537+
elog(ERROR, "Timeline IDs must be in increasing sequence: \"%s\"", tempStr);
538+
539+
entry = pgut_new(TimeLineHistoryEntry);
540+
entry->tli = tli;
541+
entry->end = ((uint64) switchpoint_hi << 32) | switchpoint_lo;
542+
543+
last_timeline = entry;
544+
/* Build list with newest item first */
545+
if (!result)
546+
result = parray_new();
547+
parray_append(result, entry);
548+
549+
/* we ignore the remainder of each line */
550+
}
551+
}
552+
553+
return result;
554+
}
555+
356556
/*
357557
* Maybe add a StreamOptions struct ?
358558
* Backup conn only needed to calculate stream_stop_timeout. Think about refactoring it.
@@ -374,7 +574,8 @@ start_WAL_streaming(PGconn *backup_conn, char *stream_dst_path, ConnectionOption
374574
stream_thread_arg.conn = pgut_connect_replication(conn_opt->pghost,
375575
conn_opt->pgport,
376576
conn_opt->pgdatabase,
377-
conn_opt->pguser);
577+
conn_opt->pguser,
578+
true);
378579
/* sanity check*/
379580
IdentifySystem(&stream_thread_arg);
380581

src/utils/pgut.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,8 @@ pgut_connect(const char *host, const char *port,
270270

271271
PGconn *
272272
pgut_connect_replication(const char *host, const char *port,
273-
const char *dbname, const char *username)
273+
const char *dbname, const char *username,
274+
bool strict)
274275
{
275276
PGconn *tmpconn;
276277
int argcount = 7; /* dbname, replication, fallback_app_name,
@@ -356,7 +357,7 @@ pgut_connect_replication(const char *host, const char *port,
356357
continue;
357358
}
358359

359-
elog(ERROR, "could not connect to database %s: %s",
360+
elog(strict ? ERROR : WARNING, "could not connect to database %s: %s",
360361
dbname, PQerrorMessage(tmpconn));
361362
PQfinish(tmpconn);
362363
free(values);

src/utils/pgut.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ extern char *pgut_get_conninfo_string(PGconn *conn);
4040
extern PGconn *pgut_connect(const char *host, const char *port,
4141
const char *dbname, const char *username);
4242
extern PGconn *pgut_connect_replication(const char *host, const char *port,
43-
const char *dbname,
44-
const char *username);
43+
const char *dbname, const char *username,
44+
bool strict);
4545
extern void pgut_disconnect(PGconn *conn);
4646
extern void pgut_disconnect_callback(bool fatal, void *userdata);
4747
extern PGresult *pgut_execute(PGconn* conn, const char *query, int nParams,

0 commit comments

Comments
 (0)