PostgreSQL Source Code git master
pg_rewind.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_rewind.c
4 * Synchronizes a PostgreSQL data directory to a new timeline
5 *
6 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 *
8 *-------------------------------------------------------------------------
9 */
10#include "postgres_fe.h"
11
12#include <sys/stat.h>
13#include <fcntl.h>
14#include <time.h>
15#include <unistd.h>
16
17#include "access/timeline.h"
19#include "catalog/catversion.h"
20#include "catalog/pg_control.h"
22#include "common/file_perm.h"
24#include "common/string.h"
28#include "file_ops.h"
29#include "filemap.h"
30#include "getopt_long.h"
31#include "pg_rewind.h"
32#include "rewind_source.h"
33#include "storage/bufpage.h"
34
35static void usage(const char *progname);
36
37static void perform_rewind(filemap_t *filemap, rewind_source *source,
38 XLogRecPtr chkptrec,
39 TimeLineID chkpttli,
40 XLogRecPtr chkptredo);
41
42static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli,
43 XLogRecPtr checkpointloc);
44
46 const char *content, size_t size);
47static void getRestoreCommand(const char *argv0);
48static void sanityChecks(void);
49static TimeLineHistoryEntry *getTimelineHistory(TimeLineID tli, bool is_source,
50 int *nentries);
52 int a_nentries,
53 TimeLineHistoryEntry *b_history,
54 int b_nentries,
55 XLogRecPtr *recptr, int *tliIndex);
56static void ensureCleanShutdown(const char *argv0);
57static void disconnect_atexit(void);
58
62
63static const char *progname;
65
66/* Configuration options */
67char *datadir_target = NULL;
68static char *datadir_source = NULL;
69static char *connstr_source = NULL;
70static char *restore_command = NULL;
71static char *config_file = NULL;
72
73static bool debug = false;
74bool showprogress = false;
75bool dry_run = false;
76bool do_sync = true;
77static bool restore_wal = false;
79
80/* Target history */
83
84/* Progress counters */
87
88static PGconn *conn;
90
91static void
92usage(const char *progname)
93{
94 printf(_("%s resynchronizes a PostgreSQL cluster with another copy of the cluster.\n\n"), progname);
95 printf(_("Usage:\n %s [OPTION]...\n\n"), progname);
96 printf(_("Options:\n"));
97 printf(_(" -c, --restore-target-wal use \"restore_command\" in target configuration to\n"
98 " retrieve WAL files from archives\n"));
99 printf(_(" -D, --target-pgdata=DIRECTORY existing data directory to modify\n"));
100 printf(_(" --source-pgdata=DIRECTORY source data directory to synchronize with\n"));
101 printf(_(" --source-server=CONNSTR source server to synchronize with\n"));
102 printf(_(" -n, --dry-run stop before modifying anything\n"));
103 printf(_(" -N, --no-sync do not wait for changes to be written\n"
104 " safely to disk\n"));
105 printf(_(" -P, --progress write progress messages\n"));
106 printf(_(" -R, --write-recovery-conf write configuration for replication\n"
107 " (requires --source-server)\n"));
108 printf(_(" --config-file=FILENAME use specified main server configuration\n"
109 " file when running target cluster\n"));
110 printf(_(" --debug write a lot of debug messages\n"));
111 printf(_(" --no-ensure-shutdown do not automatically fix unclean shutdown\n"));
112 printf(_(" --sync-method=METHOD set method for syncing files to disk\n"));
113 printf(_(" -V, --version output version information, then exit\n"));
114 printf(_(" -?, --help show this help, then exit\n"));
115 printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
116 printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL);
117}
118
119
120int
121main(int argc, char **argv)
122{
123 static struct option long_options[] = {
124 {"help", no_argument, NULL, '?'},
125 {"target-pgdata", required_argument, NULL, 'D'},
126 {"write-recovery-conf", no_argument, NULL, 'R'},
127 {"source-pgdata", required_argument, NULL, 1},
128 {"source-server", required_argument, NULL, 2},
129 {"no-ensure-shutdown", no_argument, NULL, 4},
130 {"config-file", required_argument, NULL, 5},
131 {"version", no_argument, NULL, 'V'},
132 {"restore-target-wal", no_argument, NULL, 'c'},
133 {"dry-run", no_argument, NULL, 'n'},
134 {"no-sync", no_argument, NULL, 'N'},
135 {"progress", no_argument, NULL, 'P'},
136 {"debug", no_argument, NULL, 3},
137 {"sync-method", required_argument, NULL, 6},
138 {NULL, 0, NULL, 0}
139 };
140 int option_index;
141 int c;
142 XLogRecPtr divergerec;
143 int lastcommontliIndex;
144 XLogRecPtr chkptrec;
145 TimeLineID chkpttli;
146 XLogRecPtr chkptredo;
147 TimeLineID source_tli;
148 TimeLineID target_tli;
149 XLogRecPtr target_wal_endrec;
150 XLogSegNo last_common_segno;
151 size_t size;
152 char *buffer;
153 bool no_ensure_shutdown = false;
154 bool rewind_needed;
155 bool writerecoveryconf = false;
156 filemap_t *filemap;
157
158 pg_logging_init(argv[0]);
159 set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_rewind"));
160 progname = get_progname(argv[0]);
161
162 /* Process command-line arguments */
163 if (argc > 1)
164 {
165 if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
166 {
168 exit(0);
169 }
170 if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
171 {
172 puts("pg_rewind (PostgreSQL) " PG_VERSION);
173 exit(0);
174 }
175 }
176
177 while ((c = getopt_long(argc, argv, "cD:nNPR", long_options, &option_index)) != -1)
178 {
179 switch (c)
180 {
181 case 'c':
182 restore_wal = true;
183 break;
184
185 case 'P':
186 showprogress = true;
187 break;
188
189 case 'n':
190 dry_run = true;
191 break;
192
193 case 'N':
194 do_sync = false;
195 break;
196
197 case 'R':
198 writerecoveryconf = true;
199 break;
200
201 case 3:
202 debug = true;
204 break;
205
206 case 'D': /* -D or --target-pgdata */
208 break;
209
210 case 1: /* --source-pgdata */
212 break;
213
214 case 2: /* --source-server */
216 break;
217
218 case 4:
219 no_ensure_shutdown = true;
220 break;
221
222 case 5:
224 break;
225
226 case 6:
228 exit(1);
229 break;
230
231 default:
232 /* getopt_long already emitted a complaint */
233 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
234 exit(1);
235 }
236 }
237
238 if (datadir_source == NULL && connstr_source == NULL)
239 {
240 pg_log_error("no source specified (--source-pgdata or --source-server)");
241 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
242 exit(1);
243 }
244
245 if (datadir_source != NULL && connstr_source != NULL)
246 {
247 pg_log_error("only one of --source-pgdata or --source-server can be specified");
248 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
249 exit(1);
250 }
251
252 if (datadir_target == NULL)
253 {
254 pg_log_error("no target data directory specified (--target-pgdata)");
255 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
256 exit(1);
257 }
258
259 if (writerecoveryconf && connstr_source == NULL)
260 {
261 pg_log_error("no source server information (--source-server) specified for --write-recovery-conf");
262 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
263 exit(1);
264 }
265
266 if (optind < argc)
267 {
268 pg_log_error("too many command-line arguments (first is \"%s\")",
269 argv[optind]);
270 pg_log_error_hint("Try \"%s --help\" for more information.", progname);
271 exit(1);
272 }
273
274 /*
275 * Don't allow pg_rewind to be run as root, to avoid overwriting the
276 * ownership of files in the data directory. We need only check for root
277 * -- any other user won't have sufficient permissions to modify files in
278 * the data directory.
279 */
280#ifndef WIN32
281 if (geteuid() == 0)
282 {
283 pg_log_error("cannot be executed by \"root\"");
284 pg_log_error_hint("You must run %s as the PostgreSQL superuser.",
285 progname);
286 exit(1);
287 }
288#endif
289
291
292 /* Set mask based on PGDATA permissions */
294 pg_fatal("could not read permissions of directory \"%s\": %m",
296
297 umask(pg_mode_mask);
298
299 getRestoreCommand(argv[0]);
300
301 atexit(disconnect_atexit);
302
303 /* Ok, we have all the options and we're ready to start. */
304 if (dry_run)
305 pg_log_info("Executing in dry-run mode.\n"
306 "The target directory will not be modified.");
307
308 /* First, connect to remote server. */
309 if (connstr_source)
310 {
312
315
316 if (showprogress)
317 pg_log_info("connected to server");
318
320 }
321 else
323
324 /*
325 * Check the status of the target instance.
326 *
327 * If the target instance was not cleanly shut down, start and stop the
328 * target cluster once in single-user mode to enforce recovery to finish,
329 * ensuring that the cluster can be used by pg_rewind. Note that if
330 * no_ensure_shutdown is specified, pg_rewind ignores this step, and users
331 * need to make sure by themselves that the target cluster is in a clean
332 * state.
333 */
336 pg_free(buffer);
337
338 if (!no_ensure_shutdown &&
341 {
342 ensureCleanShutdown(argv[0]);
343
346 pg_free(buffer);
347 }
348
349 buffer = source->fetch_file(source, XLOG_CONTROL_FILE, &size);
351 pg_free(buffer);
352
353 sanityChecks();
354
355 /*
356 * Usually, the TLI can be found in the latest checkpoint record. But if
357 * the source server is just being promoted (or it's a standby that's
358 * following a primary that's just being promoted), and the checkpoint
359 * requested by the promotion hasn't completed yet, the latest timeline is
360 * in minRecoveryPoint. So we check which is later, the TLI of the
361 * minRecoveryPoint or the latest checkpoint.
362 */
365
366 /* Similarly for the target. */
369
370 /*
371 * Find the common ancestor timeline between the clusters.
372 *
373 * If both clusters are already on the same timeline, there's nothing to
374 * do.
375 */
376 if (target_tli == source_tli)
377 {
378 pg_log_info("source and target cluster are on the same timeline");
379 rewind_needed = false;
380 target_wal_endrec = 0;
381 }
382 else
383 {
384 XLogRecPtr chkptendrec;
385 TimeLineHistoryEntry *sourceHistory;
386 int sourceNentries;
387
388 /*
389 * Retrieve timelines for both source and target, and find the point
390 * where they diverged.
391 */
392 sourceHistory = getTimelineHistory(source_tli, true, &sourceNentries);
393 targetHistory = getTimelineHistory(target_tli, false, &targetNentries);
394
395 findCommonAncestorTimeline(sourceHistory, sourceNentries,
397 &divergerec, &lastcommontliIndex);
398
399 pg_log_info("servers diverged at WAL location %X/%08X on timeline %u",
400 LSN_FORMAT_ARGS(divergerec),
401 targetHistory[lastcommontliIndex].tli);
402
403 /*
404 * Convert the divergence LSN to a segment number, that will be used
405 * to decide how WAL segments should be processed.
406 */
407 XLByteToSeg(divergerec, last_common_segno, ControlFile_target.xlog_seg_size);
408
409 /*
410 * Don't need the source history anymore. The target history is still
411 * needed by the routines in parsexlog.c, when we read the target WAL.
412 */
413 pfree(sourceHistory);
414
415
416 /*
417 * Determine the end-of-WAL on the target.
418 *
419 * The WAL ends at the last shutdown checkpoint, or at
420 * minRecoveryPoint if it was a standby. (If we supported rewinding a
421 * server that was not shut down cleanly, we would need to replay
422 * until we reach the first invalid record, like crash recovery does.)
423 */
424
425 /* read the checkpoint record on the target to see where it ends. */
426 chkptendrec = readOneRecord(datadir_target,
428 targetNentries - 1,
430
431 if (ControlFile_target.minRecoveryPoint > chkptendrec)
432 {
433 target_wal_endrec = ControlFile_target.minRecoveryPoint;
434 }
435 else
436 {
437 target_wal_endrec = chkptendrec;
438 }
439
440 /*
441 * Check for the possibility that the target is in fact a direct
442 * ancestor of the source. In that case, there is no divergent history
443 * in the target that needs rewinding.
444 */
445 if (target_wal_endrec > divergerec)
446 {
447 rewind_needed = true;
448 }
449 else
450 {
451 /* the last common checkpoint record must be part of target WAL */
452 Assert(target_wal_endrec == divergerec);
453
454 rewind_needed = false;
455 }
456 }
457
458 if (!rewind_needed)
459 {
460 pg_log_info("no rewind required");
465 exit(0);
466 }
467
468 /* Initialize hashtable that tracks WAL files protected from removal */
469 keepwal_init();
470
471 findLastCheckpoint(datadir_target, divergerec, lastcommontliIndex,
472 &chkptrec, &chkpttli, &chkptredo, restore_command);
473 pg_log_info("rewinding from last common checkpoint at %X/%08X on timeline %u",
474 LSN_FORMAT_ARGS(chkptrec), chkpttli);
475
476 /* Initialize the hash table to track the status of each file */
478
479 /*
480 * Collect information about all files in the both data directories.
481 */
482 if (showprogress)
483 pg_log_info("reading source file list");
485
486 if (showprogress)
487 pg_log_info("reading target file list");
489
490 /*
491 * Read the target WAL from last checkpoint before the point of fork, to
492 * extract all the pages that were modified on the target cluster after
493 * the fork.
494 */
495 if (showprogress)
496 pg_log_info("reading WAL in target");
497 extractPageMap(datadir_target, chkptrec, lastcommontliIndex,
498 target_wal_endrec, restore_command);
499
500 /*
501 * We have collected all information we need from both systems. Decide
502 * what to do with each file.
503 */
504 filemap = decide_file_actions(last_common_segno);
505 if (showprogress)
506 calculate_totals(filemap);
507
508 /* this is too verbose even for verbose mode */
509 if (debug)
510 print_filemap(filemap);
511
512 /*
513 * Ok, we're ready to start copying things over.
514 */
515 if (showprogress)
516 {
517 pg_log_info("need to copy %lu MB (total source directory size is %lu MB)",
518 (unsigned long) (filemap->fetch_size / (1024 * 1024)),
519 (unsigned long) (filemap->total_size / (1024 * 1024)));
520
521 fetch_size = filemap->fetch_size;
522 fetch_done = 0;
523 }
524
525 /*
526 * We have now collected all the information we need from both systems,
527 * and we are ready to start modifying the target directory.
528 *
529 * This is the point of no return. Once we start copying things, there is
530 * no turning back!
531 */
532 perform_rewind(filemap, source, chkptrec, chkpttli, chkptredo);
533
534 if (showprogress)
535 pg_log_info("syncing target data directory");
537
538 /* Also update the standby configuration, if requested. */
543
544 /* don't need the source connection anymore */
546 if (conn)
547 {
548 PQfinish(conn);
549 conn = NULL;
550 }
551
552 pg_log_info("Done!");
553
554 return 0;
555}
556
557/*
558 * Perform the rewind.
559 *
560 * We have already collected all the information we need from the
561 * target and the source.
562 */
563static void
565 XLogRecPtr chkptrec,
566 TimeLineID chkpttli,
567 XLogRecPtr chkptredo)
568{
569 XLogRecPtr endrec;
570 TimeLineID endtli;
571 ControlFileData ControlFile_new;
572 size_t size;
573 char *buffer;
574
575 /*
576 * Execute the actions in the file map, fetching data from the source
577 * system as needed.
578 */
579 for (int i = 0; i < filemap->nentries; i++)
580 {
581 file_entry_t *entry = filemap->entries[i];
582
583 /*
584 * If this is a relation file, copy the modified blocks.
585 *
586 * This is in addition to any other changes.
587 */
589 {
591 BlockNumber blkno;
592 off_t offset;
593
595 while (datapagemap_next(iter, &blkno))
596 {
597 offset = blkno * BLCKSZ;
598 source->queue_fetch_range(source, entry->path, offset, BLCKSZ);
599 }
600 pg_free(iter);
601 }
602
603 switch (entry->action)
604 {
605 case FILE_ACTION_NONE:
606 /* nothing else to do */
607 break;
608
609 case FILE_ACTION_COPY:
611 break;
612
614 truncate_target_file(entry->path, entry->source_size);
615 break;
616
619 entry->target_size,
620 entry->source_size - entry->target_size);
621 break;
622
624 remove_target(entry);
625 break;
626
628 create_target(entry);
629 break;
630
632 pg_fatal("no action decided for file \"%s\"", entry->path);
633 break;
634 }
635 }
636
637 /* Complete any remaining range-fetches that we queued up above. */
639
641
642 progress_report(true);
643
644 /*
645 * Fetch the control file from the source last. This ensures that the
646 * minRecoveryPoint is up-to-date.
647 */
648 buffer = source->fetch_file(source, XLOG_CONTROL_FILE, &size);
650 pg_free(buffer);
651
652 /*
653 * Sanity check: If the source is a local system, the control file should
654 * not have changed since we started.
655 *
656 * XXX: We assume it hasn't been modified, but actually, what could go
657 * wrong? The logic handles a libpq source that's modified concurrently,
658 * why not a local datadir?
659 */
660 if (datadir_source &&
662 sizeof(ControlFileData)) != 0)
663 {
664 pg_fatal("source system was modified while pg_rewind was running");
665 }
666
667 if (showprogress)
668 pg_log_info("creating backup label and updating control file");
669
670 /*
671 * Create a backup label file, to tell the target where to begin the WAL
672 * replay. Normally, from the last common checkpoint between the source
673 * and the target. But if the source is a standby server, it's possible
674 * that the last common checkpoint is *after* the standby's restartpoint.
675 * That implies that the source server has applied the checkpoint record,
676 * but hasn't performed a corresponding restartpoint yet. Make sure we
677 * start at the restartpoint's redo point in that case.
678 *
679 * Use the old version of the source's control file for this. The server
680 * might have finished the restartpoint after we started copying files,
681 * but we must begin from the redo point at the time that started copying.
682 */
683 if (ControlFile_source.checkPointCopy.redo < chkptredo)
684 {
688 }
689 createBackupLabel(chkptredo, chkpttli, chkptrec);
690
691 /*
692 * Update control file of target, to tell the target how far it must
693 * replay the WAL (minRecoveryPoint).
694 */
695 if (connstr_source)
696 {
697 /*
698 * The source is a live server. Like in an online backup, it's
699 * important that we recover all the WAL that was generated while we
700 * were copying files.
701 */
703 {
704 /*
705 * Source is a standby server. We must replay to its
706 * minRecoveryPoint.
707 */
710 }
711 else
712 {
713 /*
714 * Source is a production, non-standby, server. We must replay to
715 * the last WAL insert location.
716 */
718 pg_fatal("source system was in unexpected state at end of rewind");
719
723 }
724 }
725 else
726 {
727 /*
728 * Source is a local data directory. It should've shut down cleanly,
729 * and we must replay to the latest shutdown checkpoint.
730 */
733 }
734
735 memcpy(&ControlFile_new, &ControlFile_source_after, sizeof(ControlFileData));
736 ControlFile_new.minRecoveryPoint = endrec;
737 ControlFile_new.minRecoveryPointTLI = endtli;
738 ControlFile_new.state = DB_IN_ARCHIVE_RECOVERY;
739 if (!dry_run)
740 update_controlfile(datadir_target, &ControlFile_new, do_sync);
741}
742
743static void
745{
746 /* TODO Check that there's no backup_label in either cluster */
747
748 /* Check system_identifier match */
750 pg_fatal("source and target clusters are from different systems");
751
752 /* check version */
757 {
758 pg_fatal("clusters are not compatible with this version of pg_rewind");
759 }
760
761 /*
762 * Target cluster need to use checksums or hint bit wal-logging, this to
763 * prevent from data corruption that could occur because of hint bits.
764 */
767 {
768 pg_fatal("target server needs to use either data checksums or \"wal_log_hints = on\"");
769 }
770
771 /*
772 * Target cluster better not be running. This doesn't guard against
773 * someone starting the cluster concurrently. Also, this is probably more
774 * strict than necessary; it's OK if the target node was not shut down
775 * cleanly, as long as it isn't running at the moment.
776 */
779 pg_fatal("target server must be shut down cleanly");
780
781 /*
782 * When the source is a data directory, also require that the source
783 * server is shut down. There isn't any very strong reason for this
784 * limitation, but better safe than sorry.
785 */
786 if (datadir_source &&
789 pg_fatal("source data directory must be shut down cleanly");
790}
791
792/*
793 * Print a progress report based on the fetch_size and fetch_done variables.
794 *
795 * Progress report is written at maximum once per second, except that the
796 * last progress report is always printed.
797 *
798 * If finished is set to true, this is the last progress report. The cursor
799 * is moved to the next line.
800 */
801void
802progress_report(bool finished)
803{
805 int percent;
806 char fetch_done_str[32];
807 char fetch_size_str[32];
809
810 if (!showprogress)
811 return;
812
813 now = time(NULL);
814 if (now == last_progress_report && !finished)
815 return; /* Max once per second */
816
818 percent = fetch_size ? (int) ((fetch_done) * 100 / fetch_size) : 0;
819
820 /*
821 * Avoid overflowing past 100% or the full size. This may make the total
822 * size number change as we approach the end of the backup (the estimate
823 * will always be wrong if WAL is included), but that's better than having
824 * the done column be bigger than the total.
825 */
826 if (percent > 100)
827 percent = 100;
830
831 snprintf(fetch_done_str, sizeof(fetch_done_str), UINT64_FORMAT,
832 fetch_done / 1024);
833 snprintf(fetch_size_str, sizeof(fetch_size_str), UINT64_FORMAT,
834 fetch_size / 1024);
835
836 fprintf(stderr, _("%*s/%s kB (%d%%) copied"),
837 (int) strlen(fetch_size_str), fetch_done_str, fetch_size_str,
838 percent);
839
840 /*
841 * Stay on the same line if reporting to a terminal and we're not done
842 * yet.
843 */
844 fputc((!finished && isatty(fileno(stderr))) ? '\r' : '\n', stderr);
845}
846
847/*
848 * Find minimum from two WAL locations assuming InvalidXLogRecPtr means
849 * infinity as src/include/access/timeline.h states. This routine should
850 * be used only when comparing WAL locations related to history files.
851 */
852static XLogRecPtr
854{
855 if (!XLogRecPtrIsValid(a))
856 return b;
857 else if (!XLogRecPtrIsValid(b))
858 return a;
859 else
860 return Min(a, b);
861}
862
863/*
864 * Retrieve timeline history for the source or target system.
865 */
867getTimelineHistory(TimeLineID tli, bool is_source, int *nentries)
868{
869 TimeLineHistoryEntry *history;
870
871 /*
872 * Timeline 1 does not have a history file, so there is no need to check
873 * and fake an entry with infinite start and end positions.
874 */
875 if (tli == 1)
876 {
878 history->tli = tli;
879 history->begin = history->end = InvalidXLogRecPtr;
880 *nentries = 1;
881 }
882 else
883 {
884 char path[MAXPGPATH];
885 char *histfile;
886
887 TLHistoryFilePath(path, tli);
888
889 /* Get history file from appropriate source */
890 if (is_source)
891 histfile = source->fetch_file(source, path, NULL);
892 else
893 histfile = slurpFile(datadir_target, path, NULL);
894
895 history = rewind_parseTimeLineHistory(histfile, tli, nentries);
896 pg_free(histfile);
897 }
898
899 /* In debugging mode, print what we read */
900 if (debug)
901 {
902 int i;
903
904 if (is_source)
905 pg_log_debug("Source timeline history:");
906 else
907 pg_log_debug("Target timeline history:");
908
909 for (i = 0; i < *nentries; i++)
910 {
912
913 entry = &history[i];
914 pg_log_debug("%u: %X/%08X - %X/%08X", entry->tli,
915 LSN_FORMAT_ARGS(entry->begin),
916 LSN_FORMAT_ARGS(entry->end));
917 }
918 }
919
920 return history;
921}
922
923/*
924 * Determine the TLI of the last common timeline in the timeline history of
925 * two clusters. *tliIndex is set to the index of last common timeline in
926 * the arrays, and *recptr is set to the position where the timeline history
927 * diverged (ie. the first WAL record that's not the same in both clusters).
928 */
929static void
931 TimeLineHistoryEntry *b_history, int b_nentries,
932 XLogRecPtr *recptr, int *tliIndex)
933{
934 int i,
935 n;
936
937 /*
938 * Trace the history forward, until we hit the timeline diverge. It may
939 * still be possible that the source and target nodes used the same
940 * timeline number in their history but with different start position
941 * depending on the history files that each node has fetched in previous
942 * recovery processes. Hence check the start position of the new timeline
943 * as well and move down by one extra timeline entry if they do not match.
944 */
945 n = Min(a_nentries, b_nentries);
946 for (i = 0; i < n; i++)
947 {
948 if (a_history[i].tli != b_history[i].tli ||
949 a_history[i].begin != b_history[i].begin)
950 break;
951 }
952
953 if (i > 0)
954 {
955 i--;
956 *recptr = MinXLogRecPtr(a_history[i].end, b_history[i].end);
957 *tliIndex = i;
958 return;
959 }
960 else
961 {
962 pg_fatal("could not find common ancestor of the source and target cluster's timelines");
963 }
964}
965
966
967/*
968 * Create a backup_label file that forces recovery to begin at the last common
969 * checkpoint.
970 */
971static void
972createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpointloc)
973{
974 XLogSegNo startsegno;
975 time_t stamp_time;
976 char strfbuf[128];
977 char xlogfilename[MAXFNAMELEN];
978 struct tm *tmp;
979 char buf[1000];
980 int len;
981
982 XLByteToSeg(startpoint, startsegno, WalSegSz);
983 XLogFileName(xlogfilename, starttli, startsegno, WalSegSz);
984
985 /*
986 * Construct backup label file
987 */
988 stamp_time = time(NULL);
989 tmp = localtime(&stamp_time);
990 strftime(strfbuf, sizeof(strfbuf), "%Y-%m-%d %H:%M:%S %Z", tmp);
991
992 len = snprintf(buf, sizeof(buf),
993 "START WAL LOCATION: %X/%08X (file %s)\n"
994 "CHECKPOINT LOCATION: %X/%08X\n"
995 "BACKUP METHOD: pg_rewind\n"
996 "BACKUP FROM: standby\n"
997 "START TIME: %s\n",
998 /* omit LABEL: line */
999 LSN_FORMAT_ARGS(startpoint), xlogfilename,
1000 LSN_FORMAT_ARGS(checkpointloc),
1001 strfbuf);
1002 if (len >= sizeof(buf))
1003 pg_fatal("backup label buffer too small"); /* shouldn't happen */
1004
1005 /* TODO: move old file out of the way, if any. */
1006 open_target_file("backup_label", true); /* BACKUP_LABEL_FILE */
1009}
1010
1011/*
1012 * Check CRC of control file
1013 */
1014static void
1016{
1017 pg_crc32c crc;
1018
1019 /* Calculate CRC */
1022 FIN_CRC32C(crc);
1023
1024 /* And simply compare it */
1025 if (!EQ_CRC32C(crc, ControlFile->crc))
1026 pg_fatal("unexpected control file CRC");
1027}
1028
1029/*
1030 * Verify control file contents in the buffer 'content', and copy it to
1031 * *ControlFile.
1032 */
1033static void
1035 size_t size)
1036{
1037 if (size != PG_CONTROL_FILE_SIZE)
1038 pg_fatal("unexpected control file size %d, expected %d",
1039 (int) size, PG_CONTROL_FILE_SIZE);
1040
1041 memcpy(ControlFile, content, sizeof(ControlFileData));
1042
1043 /* set and validate WalSegSz */
1045
1047 {
1048 pg_log_error(ngettext("invalid WAL segment size in control file (%d byte)",
1049 "invalid WAL segment size in control file (%d bytes)",
1050 WalSegSz),
1051 WalSegSz);
1052 pg_log_error_detail("The WAL segment size must be a power of two between 1 MB and 1 GB.");
1053 exit(1);
1054 }
1055
1056 /* Additional checks on control file */
1058}
1059
1060/*
1061 * Get value of GUC parameter restore_command from the target cluster.
1062 *
1063 * This uses a logic based on "postgres -C" to get the value from the
1064 * cluster.
1065 */
1066static void
1068{
1069 int rc;
1070 char postgres_exec_path[MAXPGPATH];
1071 PQExpBuffer postgres_cmd;
1072
1073 if (!restore_wal)
1074 return;
1075
1076 /* find postgres executable */
1077 rc = find_other_exec(argv0, "postgres",
1079 postgres_exec_path);
1080
1081 if (rc < 0)
1082 {
1083 char full_path[MAXPGPATH];
1084
1085 if (find_my_exec(argv0, full_path) < 0)
1086 strlcpy(full_path, progname, sizeof(full_path));
1087
1088 if (rc == -1)
1089 pg_fatal("program \"%s\" is needed by %s but was not found in the same directory as \"%s\"",
1090 "postgres", progname, full_path);
1091 else
1092 pg_fatal("program \"%s\" was found by \"%s\" but was not the same version as %s",
1093 "postgres", full_path, progname);
1094 }
1095
1096 /*
1097 * Build a command able to retrieve the value of GUC parameter
1098 * restore_command, if set.
1099 */
1100 postgres_cmd = createPQExpBuffer();
1101
1102 /* path to postgres, properly quoted */
1103 appendShellString(postgres_cmd, postgres_exec_path);
1104
1105 /* add -D switch, with properly quoted data directory */
1106 appendPQExpBufferStr(postgres_cmd, " -D ");
1107 appendShellString(postgres_cmd, datadir_target);
1108
1109 /* add custom configuration file only if requested */
1110 if (config_file != NULL)
1111 {
1112 appendPQExpBufferStr(postgres_cmd, " -c config_file=");
1113 appendShellString(postgres_cmd, config_file);
1114 }
1115
1116 /* add -C switch, for restore_command */
1117 appendPQExpBufferStr(postgres_cmd, " -C restore_command");
1118
1119 restore_command = pipe_read_line(postgres_cmd->data);
1120 if (restore_command == NULL)
1121 pg_fatal("could not read \"restore_command\" from target cluster");
1122
1124
1125 if (strcmp(restore_command, "") == 0)
1126 pg_fatal("\"restore_command\" is not set in the target cluster");
1127
1128 pg_log_debug("using for rewind \"restore_command = \'%s\'\"",
1130
1131 destroyPQExpBuffer(postgres_cmd);
1132}
1133
1134
1135/*
1136 * Ensure clean shutdown of target instance by launching single-user mode
1137 * postgres to do crash recovery.
1138 */
1139static void
1141{
1142 int ret;
1143 char exec_path[MAXPGPATH];
1144 PQExpBuffer postgres_cmd;
1145
1146 /* locate postgres binary */
1147 if ((ret = find_other_exec(argv0, "postgres",
1149 exec_path)) < 0)
1150 {
1151 char full_path[MAXPGPATH];
1152
1153 if (find_my_exec(argv0, full_path) < 0)
1154 strlcpy(full_path, progname, sizeof(full_path));
1155
1156 if (ret == -1)
1157 pg_fatal("program \"%s\" is needed by %s but was not found in the same directory as \"%s\"",
1158 "postgres", progname, full_path);
1159 else
1160 pg_fatal("program \"%s\" was found by \"%s\" but was not the same version as %s",
1161 "postgres", full_path, progname);
1162 }
1163
1164 pg_log_info("executing \"%s\" for target server to complete crash recovery",
1165 exec_path);
1166
1167 /*
1168 * Skip processing if requested, but only after ensuring presence of
1169 * postgres.
1170 */
1171 if (dry_run)
1172 return;
1173
1174 /*
1175 * Finally run postgres in single-user mode. There is no need to use
1176 * fsync here. This makes the recovery faster, and the target data folder
1177 * is synced at the end anyway.
1178 */
1179 postgres_cmd = createPQExpBuffer();
1180
1181 /* path to postgres, properly quoted */
1182 appendShellString(postgres_cmd, exec_path);
1183
1184 /* add set of options with properly quoted data directory */
1185 appendPQExpBufferStr(postgres_cmd, " --single -F -D ");
1186 appendShellString(postgres_cmd, datadir_target);
1187
1188 /* add custom configuration file only if requested */
1189 if (config_file != NULL)
1190 {
1191 appendPQExpBufferStr(postgres_cmd, " -c config_file=");
1192 appendShellString(postgres_cmd, config_file);
1193 }
1194
1195 /* finish with the database name, and a properly quoted redirection */
1196 appendPQExpBufferStr(postgres_cmd, " template1 < ");
1197 appendShellString(postgres_cmd, DEVNULL);
1198
1199 fflush(NULL);
1200 if (system(postgres_cmd->data) != 0)
1201 {
1202 pg_log_error("postgres single-user mode in target cluster failed");
1203 pg_log_error_detail("Command was: %s", postgres_cmd->data);
1204 exit(1);
1205 }
1206
1207 destroyPQExpBuffer(postgres_cmd);
1208}
1209
1210static void
1212{
1213 if (conn != NULL)
1214 PQfinish(conn);
1215}
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1609
uint32 BlockNumber
Definition: block.h:31
#define PG_DATA_CHECKSUM_VERSION
Definition: bufpage.h:206
#define Min(x, y)
Definition: c.h:1008
#define ngettext(s, p, n)
Definition: c.h:1171
#define Max(x, y)
Definition: c.h:1002
#define PG_TEXTDOMAIN(domain)
Definition: c.h:1204
#define UINT64_FORMAT
Definition: c.h:562
uint64_t uint64
Definition: c.h:544
#define CATALOG_VERSION_NO
Definition: catversion.h:60
int find_my_exec(const char *argv0, char *retpath)
Definition: exec.c:161
char * pipe_read_line(char *cmd)
Definition: exec.c:353
void set_pglocale_pgservice(const char *argv0, const char *app)
Definition: exec.c:430
int find_other_exec(const char *argv0, const char *target, const char *versionstr, char *retpath)
Definition: exec.c:311
void update_controlfile(const char *DataDir, ControlFileData *ControlFile, bool do_sync)
#define fprintf(file, fmt, msg)
Definition: cubescan.l:21
bool datapagemap_next(datapagemap_iterator_t *iter, BlockNumber *blkno)
Definition: datapagemap.c:87
datapagemap_iterator_t * datapagemap_iterate(datapagemap_t *map)
Definition: datapagemap.c:75
#define _(x)
Definition: elog.c:91
PGconn * PQconnectdb(const char *conninfo)
Definition: fe-connect.c:825
ConnStatusType PQstatus(const PGconn *conn)
Definition: fe-connect.c:7641
void PQfinish(PGconn *conn)
Definition: fe-connect.c:5316
char * PQerrorMessage(const PGconn *conn)
Definition: fe-connect.c:7704
void * pg_malloc(size_t size)
Definition: fe_memutils.c:47
char * pg_strdup(const char *in)
Definition: fe_memutils.c:85
void pg_free(void *ptr)
Definition: fe_memutils.c:105
void traverse_datadir(const char *datadir, process_file_callback_t callback)
Definition: file_ops.c:362
char * slurpFile(const char *datadir, const char *path, size_t *filesize)
Definition: file_ops.c:314
void close_target_file(void)
Definition: file_ops.c:75
void truncate_target_file(const char *path, off_t newsize)
Definition: file_ops.c:206
void remove_target(file_entry_t *entry)
Definition: file_ops.c:130
void sync_target_dir(void)
Definition: file_ops.c:294
void create_target(file_entry_t *entry)
Definition: file_ops.c:156
void open_target_file(const char *path, bool trunc)
Definition: file_ops.c:47
void write_target_range(char *buf, off_t begin, size_t size)
Definition: file_ops.c:88
int pg_mode_mask
Definition: file_perm.c:25
bool GetDataDirectoryCreatePerm(const char *dataDir)
DataDirSyncMethod
Definition: file_utils.h:28
@ DATA_DIR_SYNC_METHOD_FSYNC
Definition: file_utils.h:29
void filehash_init(void)
Definition: filemap.c:197
void process_source_file(const char *path, file_type_t type, size_t size, const char *link_target)
Definition: filemap.c:280
void print_filemap(filemap_t *filemap)
Definition: filemap.c:541
void keepwal_init(void)
Definition: filemap.c:243
void process_target_file(const char *path, file_type_t type, size_t size, const char *link_target)
Definition: filemap.c:316
void calculate_totals(filemap_t *filemap)
Definition: filemap.c:500
filemap_t * decide_file_actions(XLogSegNo last_common_segno)
Definition: filemap.c:924
@ FILE_ACTION_REMOVE
Definition: filemap.h:28
@ FILE_ACTION_COPY
Definition: filemap.h:22
@ FILE_ACTION_NONE
Definition: filemap.h:25
@ FILE_ACTION_COPY_TAIL
Definition: filemap.h:23
@ FILE_ACTION_UNDECIDED
Definition: filemap.h:19
@ FILE_ACTION_TRUNCATE
Definition: filemap.h:27
@ FILE_ACTION_CREATE
Definition: filemap.h:21
int getopt_long(int argc, char *const argv[], const char *optstring, const struct option *longopts, int *longindex)
Definition: getopt_long.c:60
#define no_argument
Definition: getopt_long.h:25
#define required_argument
Definition: getopt_long.h:26
Assert(PointerIsAligned(start, uint64))
int b
Definition: isn.c:74
int a
Definition: isn.c:73
int i
Definition: isn.c:77
@ CONNECTION_BAD
Definition: libpq-fe.h:85
rewind_source * init_libpq_source(PGconn *conn)
Definition: libpq_source.c:81
rewind_source * init_local_source(const char *datadir)
Definition: local_source.c:38
static struct pg_tm tm
Definition: localtime.c:104
void pg_logging_increase_verbosity(void)
Definition: logging.c:185
void pg_logging_init(const char *argv0)
Definition: logging.c:83
#define pg_log_error(...)
Definition: logging.h:106
#define pg_log_error_hint(...)
Definition: logging.h:112
#define pg_log_info(...)
Definition: logging.h:124
#define pg_log_error_detail(...)
Definition: logging.h:109
#define pg_log_debug(...)
Definition: logging.h:133
void pfree(void *pointer)
Definition: mcxt.c:1594
bool parse_sync_method(const char *optarg, DataDirSyncMethod *sync_method)
Definition: option_utils.c:90
void extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex, XLogRecPtr endpoint, const char *restoreCommand)
Definition: parsexlog.c:66
void findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli, XLogRecPtr *lastchkptredo, const char *restoreCommand)
Definition: parsexlog.c:168
XLogRecPtr readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex, const char *restoreCommand)
Definition: parsexlog.c:124
static pg_time_t last_progress_report
Definition: pg_amcheck.c:147
#define pg_fatal(...)
static bool writerecoveryconf
#define MAXPGPATH
#define PG_CONTROL_VERSION
Definition: pg_control.h:25
@ DB_IN_PRODUCTION
Definition: pg_control.h:97
@ DB_IN_ARCHIVE_RECOVERY
Definition: pg_control.h:96
@ DB_SHUTDOWNED_IN_RECOVERY
Definition: pg_control.h:93
@ DB_SHUTDOWNED
Definition: pg_control.h:92
#define PG_CONTROL_FILE_SIZE
Definition: pg_control.h:258
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:153
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:158
const void size_t len
return crc
static char * argv0
Definition: pg_ctl.c:94
static char * exec_path
Definition: pg_ctl.c:89
PGDLLIMPORT int optind
Definition: getopt.c:51
PGDLLIMPORT char * optarg
Definition: getopt.c:53
static ControlFileData ControlFile_source
Definition: pg_rewind.c:60
static void createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpointloc)
Definition: pg_rewind.c:972
static void usage(const char *progname)
Definition: pg_rewind.c:92
static void sanityChecks(void)
Definition: pg_rewind.c:744
static char * datadir_source
Definition: pg_rewind.c:68
static void findCommonAncestorTimeline(TimeLineHistoryEntry *a_history, int a_nentries, TimeLineHistoryEntry *b_history, int b_nentries, XLogRecPtr *recptr, int *tliIndex)
Definition: pg_rewind.c:930
static ControlFileData ControlFile_source_after
Definition: pg_rewind.c:61
int WalSegSz
Definition: pg_rewind.c:64
static char * restore_command
Definition: pg_rewind.c:70
static bool debug
Definition: pg_rewind.c:73
int main(int argc, char **argv)
Definition: pg_rewind.c:121
static XLogRecPtr MinXLogRecPtr(XLogRecPtr a, XLogRecPtr b)
Definition: pg_rewind.c:853
static void ensureCleanShutdown(const char *argv0)
Definition: pg_rewind.c:1140
TimeLineHistoryEntry * targetHistory
Definition: pg_rewind.c:81
static rewind_source * source
Definition: pg_rewind.c:89
static ControlFileData ControlFile_target
Definition: pg_rewind.c:59
void progress_report(bool finished)
Definition: pg_rewind.c:802
static TimeLineHistoryEntry * getTimelineHistory(TimeLineID tli, bool is_source, int *nentries)
Definition: pg_rewind.c:867
static void digestControlFile(ControlFileData *ControlFile, const char *content, size_t size)
Definition: pg_rewind.c:1034
static char * connstr_source
Definition: pg_rewind.c:69
static void checkControlFile(ControlFileData *ControlFile)
Definition: pg_rewind.c:1015
static void getRestoreCommand(const char *argv0)
Definition: pg_rewind.c:1067
char * datadir_target
Definition: pg_rewind.c:67
bool do_sync
Definition: pg_rewind.c:76
static bool restore_wal
Definition: pg_rewind.c:77
uint64 fetch_done
Definition: pg_rewind.c:86
int targetNentries
Definition: pg_rewind.c:82
uint64 fetch_size
Definition: pg_rewind.c:85
static char * config_file
Definition: pg_rewind.c:71
bool dry_run
Definition: pg_rewind.c:75
DataDirSyncMethod sync_method
Definition: pg_rewind.c:78
bool showprogress
Definition: pg_rewind.c:74
static const char * progname
Definition: pg_rewind.c:63
static void perform_rewind(filemap_t *filemap, rewind_source *source, XLogRecPtr chkptrec, TimeLineID chkpttli, XLogRecPtr chkptredo)
Definition: pg_rewind.c:564
static void disconnect_atexit(void)
Definition: pg_rewind.c:1211
static PGconn * conn
Definition: pg_rewind.c:88
TimeLineHistoryEntry * rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries)
Definition: timeline.c:28
static char * buf
Definition: pg_test_fsync.c:72
int64 pg_time_t
Definition: pgtime.h:23
#define snprintf
Definition: port.h:239
#define DEVNULL
Definition: port.h:161
#define PG_BACKEND_VERSIONSTR
Definition: port.h:144
const char * get_progname(const char *argv0)
Definition: path.c:652
#define printf(...)
Definition: port.h:245
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
PQExpBuffer createPQExpBuffer(void)
Definition: pqexpbuffer.c:72
void destroyPQExpBuffer(PQExpBuffer str)
Definition: pqexpbuffer.c:114
void appendPQExpBufferStr(PQExpBuffer str, const char *data)
Definition: pqexpbuffer.c:367
char * c
void WriteRecoveryConfig(PGconn *pgconn, const char *target_dir, PQExpBuffer contents)
Definition: recovery_gen.c:125
PQExpBuffer GenerateRecoveryConfig(PGconn *pgconn, const char *replication_slot, char *dbname)
Definition: recovery_gen.c:28
char * GetDbnameFromConnectionOptions(const char *connstr)
Definition: recovery_gen.c:204
void get_restricted_token(void)
int pg_strip_crlf(char *str)
Definition: string.c:154
void appendShellString(PQExpBuffer buf, const char *str)
Definition: string_utils.c:582
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
XLogRecPtr redo
Definition: pg_control.h:37
uint32 pg_control_version
Definition: pg_control.h:125
uint32 xlog_seg_size
Definition: pg_control.h:213
CheckPoint checkPointCopy
Definition: pg_control.h:135
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:168
uint32 data_checksum_version
Definition: pg_control.h:224
XLogRecPtr checkPoint
Definition: pg_control.h:133
uint64 system_identifier
Definition: pg_control.h:110
uint32 catalog_version_no
Definition: pg_control.h:126
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:169
pg_crc32c crc
Definition: pg_control.h:240
XLogRecPtr begin
Definition: timeline.h:28
TimeLineID tli
Definition: timeline.h:27
XLogRecPtr end
Definition: timeline.h:29
int bitmapsize
Definition: datapagemap.h:17
Definition: filemap.h:58
datapagemap_t target_pages_to_overwrite
Definition: filemap.h:76
const char * path
Definition: filemap.h:61
size_t source_size
Definition: filemap.h:83
size_t target_size
Definition: filemap.h:69
file_action_t action
Definition: filemap.h:89
file_entry_t * entries[FLEXIBLE_ARRAY_MEMBER]
Definition: filemap.h:104
int nentries
Definition: filemap.h:103
uint64 total_size
Definition: filemap.h:100
uint64 fetch_size
Definition: filemap.h:101
void(* queue_fetch_file)(struct rewind_source *, const char *path, size_t len)
Definition: rewind_source.h:60
void(* traverse_files)(struct rewind_source *, process_file_callback_t callback)
Definition: rewind_source.h:29
void(* finish_fetch)(struct rewind_source *)
Definition: rewind_source.h:66
XLogRecPtr(* get_current_wal_insert_lsn)(struct rewind_source *)
Definition: rewind_source.h:71
void(* queue_fetch_range)(struct rewind_source *, const char *path, off_t offset, size_t len)
Definition: rewind_source.h:47
char *(* fetch_file)(struct rewind_source *, const char *path, size_t *filesize)
Definition: rewind_source.h:37
void(* destroy)(struct rewind_source *)
Definition: rewind_source.h:76
static ControlFileData * ControlFile
Definition: xlog.c:575
#define IsValidWalSegSize(size)
Definition: xlog_internal.h:96
#define XLOG_CONTROL_FILE
#define MAXFNAMELEN
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
static void TLHistoryFilePath(char *path, TimeLineID tli)
#define XLogRecPtrIsValid(r)
Definition: xlogdefs.h:29
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:47
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28
uint32 TimeLineID
Definition: xlogdefs.h:63
uint64 XLogSegNo
Definition: xlogdefs.h:52