PostgreSQL Source Code git master
xlog.c File Reference
#include "postgres.h"
#include <ctype.h>
#include <math.h>
#include <time.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
#include "access/clog.h"
#include "access/commit_ts.h"
#include "access/heaptoast.h"
#include "access/multixact.h"
#include "access/rewriteheap.h"
#include "access/subtrans.h"
#include "access/timeline.h"
#include "access/transam.h"
#include "access/twophase.h"
#include "access/xact.h"
#include "access/xlog_internal.h"
#include "access/xlogarchive.h"
#include "access/xloginsert.h"
#include "access/xlogreader.h"
#include "access/xlogrecovery.h"
#include "access/xlogutils.h"
#include "access/xlogwait.h"
#include "backup/basebackup.h"
#include "catalog/catversion.h"
#include "catalog/pg_control.h"
#include "catalog/pg_database.h"
#include "common/controldata_utils.h"
#include "common/file_utils.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
#include "port/atomics.h"
#include "postmaster/bgwriter.h"
#include "postmaster/startup.h"
#include "postmaster/walsummarizer.h"
#include "postmaster/walwriter.h"
#include "replication/origin.h"
#include "replication/slot.h"
#include "replication/snapbuild.h"
#include "replication/walreceiver.h"
#include "replication/walsender.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "storage/large_object.h"
#include "storage/latch.h"
#include "storage/predicate.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "storage/reinit.h"
#include "storage/spin.h"
#include "storage/sync.h"
#include "utils/guc_hooks.h"
#include "utils/guc_tables.h"
#include "utils/injection_point.h"
#include "utils/pgstat_internal.h"
#include "utils/ps_status.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
#include "utils/timeout.h"
#include "utils/timestamp.h"
#include "utils/varlena.h"
Include dependency graph for xlog.c:

Go to the source code of this file.

Data Structures

struct  XLogwrtRqst
 
struct  XLogwrtResult
 
struct  WALInsertLock
 
union  WALInsertLockPadded
 
struct  XLogCtlInsert
 
struct  XLogCtlData
 

Macros

#define BootstrapTimeLineID   1
 
#define NUM_XLOGINSERT_LOCKS   8
 
#define INSERT_FREESPACE(endptr)    (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))
 
#define NextBufIdx(idx)    (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
 
#define XLogRecPtrToBufIdx(recptr)    (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))
 
#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)
 
#define ConvertToXSegs(x, segsize)   XLogMBVarToSegs((x), (segsize))
 
#define RefreshXLogWriteResult(_target)
 

Typedefs

typedef struct XLogwrtRqst XLogwrtRqst
 
typedef struct XLogwrtResult XLogwrtResult
 
typedef union WALInsertLockPadded WALInsertLockPadded
 
typedef struct XLogCtlInsert XLogCtlInsert
 
typedef struct XLogCtlData XLogCtlData
 

Enumerations

enum  WalInsertClass { WALINSERT_NORMAL , WALINSERT_SPECIAL_SWITCH , WALINSERT_SPECIAL_CHECKPOINT }
 

Functions

static void CleanupAfterArchiveRecovery (TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
 
static void CheckRequiredParameterValues (void)
 
static void XLogReportParameters (void)
 
static int LocalSetXLogInsertAllowed (void)
 
static void CreateEndOfRecoveryRecord (void)
 
static XLogRecPtr CreateOverwriteContrecordRecord (XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
 
static void CheckPointGuts (XLogRecPtr checkPointRedo, int flags)
 
static void KeepLogSeg (XLogRecPtr recptr, XLogSegNo *logSegNo)
 
static XLogRecPtr XLogGetReplicationSlotMinimumLSN (void)
 
static void AdvanceXLInsertBuffer (XLogRecPtr upto, TimeLineID tli, bool opportunistic)
 
static void XLogWrite (XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 
static bool InstallXLogFileSegment (XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
 
static void XLogFileClose (void)
 
static void PreallocXlogFiles (XLogRecPtr endptr, TimeLineID tli)
 
static void RemoveTempXlogFiles (void)
 
static void RemoveOldXlogFiles (XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
 
static void RemoveXlogFile (const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
 
static void UpdateLastRemovedPtr (char *filename)
 
static void ValidateXLOGDirectoryStructure (void)
 
static void CleanupBackupHistory (void)
 
static void UpdateMinRecoveryPoint (XLogRecPtr lsn, bool force)
 
static bool PerformRecoveryXLogAction (void)
 
static void InitControlFile (uint64 sysidentifier, uint32 data_checksum_version)
 
static void WriteControlFile (void)
 
static void ReadControlFile (void)
 
static void UpdateControlFile (void)
 
static char * str_time (pg_time_t tnow, char *buf, size_t bufsize)
 
static int get_sync_bit (int method)
 
static void CopyXLogRecordToWAL (int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
 
static void ReserveXLogInsertLocation (int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static bool ReserveXLogSwitch (XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
 
static XLogRecPtr WaitXLogInsertionsToFinish (XLogRecPtr upto)
 
static char * GetXLogBuffer (XLogRecPtr ptr, TimeLineID tli)
 
static XLogRecPtr XLogBytePosToRecPtr (uint64 bytepos)
 
static XLogRecPtr XLogBytePosToEndRecPtr (uint64 bytepos)
 
static uint64 XLogRecPtrToBytePos (XLogRecPtr ptr)
 
static void WALInsertLockAcquire (void)
 
static void WALInsertLockAcquireExclusive (void)
 
static void WALInsertLockRelease (void)
 
static void WALInsertLockUpdateInsertingAt (XLogRecPtr insertingAt)
 
XLogRecPtr XLogInsertRecord (XLogRecData *rdata, XLogRecPtr fpw_lsn, uint8 flags, int num_fpi, uint64 fpi_bytes, bool topxid_included)
 
Size WALReadFromBuffers (char *dstbuf, XLogRecPtr startptr, Size count, TimeLineID tli)
 
static void CalculateCheckpointSegments (void)
 
void assign_max_wal_size (int newval, void *extra)
 
void assign_checkpoint_completion_target (double newval, void *extra)
 
bool check_wal_segment_size (int *newval, void **extra, GucSource source)
 
static XLogSegNo XLOGfileslop (XLogRecPtr lastredoptr)
 
bool XLogCheckpointNeeded (XLogSegNo new_segno)
 
void XLogSetAsyncXactLSN (XLogRecPtr asyncXactLSN)
 
void XLogSetReplicationSlotMinimumLSN (XLogRecPtr lsn)
 
void XLogFlush (XLogRecPtr record)
 
bool XLogBackgroundFlush (void)
 
bool XLogNeedsFlush (XLogRecPtr record)
 
static int XLogFileInitInternal (XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
 
int XLogFileInit (XLogSegNo logsegno, TimeLineID logtli)
 
static void XLogFileCopy (TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
 
int XLogFileOpen (XLogSegNo segno, TimeLineID tli)
 
void CheckXLogRemoved (XLogSegNo segno, TimeLineID tli)
 
XLogSegNo XLogGetLastRemovedSegno (void)
 
XLogSegNo XLogGetOldestSegno (TimeLineID tli)
 
void RemoveNonParentXlogFiles (XLogRecPtr switchpoint, TimeLineID newTLI)
 
uint64 GetSystemIdentifier (void)
 
char * GetMockAuthenticationNonce (void)
 
bool DataChecksumsEnabled (void)
 
bool GetDefaultCharSignedness (void)
 
XLogRecPtr GetFakeLSNForUnloggedRel (void)
 
static int XLOGChooseNumBuffers (void)
 
bool check_wal_buffers (int *newval, void **extra, GucSource source)
 
bool check_wal_consistency_checking (char **newval, void **extra, GucSource source)
 
void assign_wal_consistency_checking (const char *newval, void *extra)
 
void InitializeWalConsistencyChecking (void)
 
const char * show_archive_command (void)
 
const char * show_in_hot_standby (void)
 
void LocalProcessControlFile (bool reset)
 
WalLevel GetActiveWalLevelOnStandby (void)
 
Size XLOGShmemSize (void)
 
void XLOGShmemInit (void)
 
void BootStrapXLOG (uint32 data_checksum_version)
 
static void XLogInitNewTimeline (TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
 
void StartupXLOG (void)
 
void SwitchIntoArchiveRecovery (XLogRecPtr EndRecPtr, TimeLineID replayTLI)
 
void ReachedEndOfBackup (XLogRecPtr EndRecPtr, TimeLineID tli)
 
bool RecoveryInProgress (void)
 
RecoveryState GetRecoveryState (void)
 
bool XLogInsertAllowed (void)
 
XLogRecPtr GetRedoRecPtr (void)
 
void GetFullPageWriteInfo (XLogRecPtr *RedoRecPtr_p, bool *doPageWrites_p)
 
XLogRecPtr GetInsertRecPtr (void)
 
XLogRecPtr GetFlushRecPtr (TimeLineID *insertTLI)
 
TimeLineID GetWALInsertionTimeLine (void)
 
TimeLineID GetWALInsertionTimeLineIfSet (void)
 
XLogRecPtr GetLastImportantRecPtr (void)
 
pg_time_t GetLastSegSwitchData (XLogRecPtr *lastSwitchLSN)
 
void ShutdownXLOG (int code, Datum arg)
 
static void LogCheckpointStart (int flags, bool restartpoint)
 
static void LogCheckpointEnd (bool restartpoint)
 
static void UpdateCheckPointDistanceEstimate (uint64 nbytes)
 
static void update_checkpoint_display (int flags, bool restartpoint, bool reset)
 
bool CreateCheckPoint (int flags)
 
static void RecoveryRestartPoint (const CheckPoint *checkPoint, XLogReaderState *record)
 
bool CreateRestartPoint (int flags)
 
WALAvailability GetWALAvailability (XLogRecPtr targetLSN)
 
void XLogPutNextOid (Oid nextOid)
 
XLogRecPtr RequestXLogSwitch (bool mark_unimportant)
 
XLogRecPtr XLogRestorePoint (const char *rpName)
 
void UpdateFullPageWrites (void)
 
void xlog_redo (XLogReaderState *record)
 
void assign_wal_sync_method (int new_wal_sync_method, void *extra)
 
void issue_xlog_fsync (int fd, XLogSegNo segno, TimeLineID tli)
 
void do_pg_backup_start (const char *backupidstr, bool fast, List **tablespaces, BackupState *state, StringInfo tblspcmapfile)
 
SessionBackupState get_backup_status (void)
 
void do_pg_backup_stop (BackupState *state, bool waitforarchive)
 
void do_pg_abort_backup (int code, Datum arg)
 
void register_persistent_abort_backup_handler (void)
 
XLogRecPtr GetXLogInsertRecPtr (void)
 
XLogRecPtr GetXLogWriteRecPtr (void)
 
void GetOldestRestartPoint (XLogRecPtr *oldrecptr, TimeLineID *oldtli)
 
void XLogShutdownWalRcv (void)
 
void SetInstallXLogFileSegmentActive (void)
 
void ResetInstallXLogFileSegmentActive (void)
 
bool IsInstallXLogFileSegmentActive (void)
 
void SetWalWriterSleeping (bool sleeping)
 

Variables

int max_wal_size_mb = 1024
 
int min_wal_size_mb = 80
 
int wal_keep_size_mb = 0
 
int XLOGbuffers = -1
 
int XLogArchiveTimeout = 0
 
int XLogArchiveMode = ARCHIVE_MODE_OFF
 
char * XLogArchiveCommand = NULL
 
bool EnableHotStandby = false
 
bool fullPageWrites = true
 
bool wal_log_hints = false
 
int wal_compression = WAL_COMPRESSION_NONE
 
char * wal_consistency_checking_string = NULL
 
bool * wal_consistency_checking = NULL
 
bool wal_init_zero = true
 
bool wal_recycle = true
 
bool log_checkpoints = true
 
int wal_sync_method = DEFAULT_WAL_SYNC_METHOD
 
int wal_level = WAL_LEVEL_REPLICA
 
int CommitDelay = 0
 
int CommitSiblings = 5
 
int wal_retrieve_retry_interval = 5000
 
int max_slot_wal_keep_size_mb = -1
 
int wal_decode_buffer_size = 512 * 1024
 
bool track_wal_io_timing = false
 
int wal_segment_size = DEFAULT_XLOG_SEG_SIZE
 
int CheckPointSegments
 
static double CheckPointDistanceEstimate = 0
 
static double PrevCheckPointDistance = 0
 
static bool check_wal_consistency_checking_deferred = false
 
const struct config_enum_entry wal_sync_method_options []
 
const struct config_enum_entry archive_mode_options []
 
CheckpointStatsData CheckpointStats
 
static bool lastFullPageWrites
 
static bool LocalRecoveryInProgress = true
 
static int LocalXLogInsertAllowed = -1
 
XLogRecPtr ProcLastRecPtr = InvalidXLogRecPtr
 
XLogRecPtr XactLastRecEnd = InvalidXLogRecPtr
 
XLogRecPtr XactLastCommitEnd = InvalidXLogRecPtr
 
static XLogRecPtr RedoRecPtr
 
static bool doPageWrites
 
static SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
 
static XLogCtlDataXLogCtl = NULL
 
static WALInsertLockPaddedWALInsertLocks = NULL
 
static ControlFileDataControlFile = NULL
 
static int UsableBytesInSegment
 
static XLogwrtResult LogwrtResult = {0, 0}
 
static int openLogFile = -1
 
static XLogSegNo openLogSegNo = 0
 
static TimeLineID openLogTLI = 0
 
static XLogRecPtr LocalMinRecoveryPoint
 
static TimeLineID LocalMinRecoveryPointTLI
 
static bool updateMinRecoveryPoint = true
 
static int MyLockNo = 0
 
static bool holdingAllLocks = false
 

Macro Definition Documentation

◆ BootstrapTimeLineID

#define BootstrapTimeLineID   1

Definition at line 113 of file xlog.c.

◆ ConvertToXSegs

#define ConvertToXSegs (   x,
  segsize 
)    XLogMBVarToSegs((x), (segsize))

Definition at line 604 of file xlog.c.

◆ INSERT_FREESPACE

#define INSERT_FREESPACE (   endptr)     (((endptr) % XLOG_BLCKSZ == 0) ? 0 : (XLOG_BLCKSZ - (endptr) % XLOG_BLCKSZ))

Definition at line 581 of file xlog.c.

◆ NextBufIdx

#define NextBufIdx (   idx)     (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))

Definition at line 585 of file xlog.c.

◆ NUM_XLOGINSERT_LOCKS

#define NUM_XLOGINSERT_LOCKS   8

Definition at line 152 of file xlog.c.

◆ RefreshXLogWriteResult

#define RefreshXLogWriteResult (   _target)
Value:
do { \
pg_read_barrier(); \
} while (0)
static uint64 pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:465
pg_atomic_uint64 logWriteResult
Definition: xlog.c:473
pg_atomic_uint64 logFlushResult
Definition: xlog.c:474
static XLogCtlData * XLogCtl
Definition: xlog.c:567

Definition at line 621 of file xlog.c.

◆ UsableBytesInPage

#define UsableBytesInPage   (XLOG_BLCKSZ - SizeOfXLogShortPHD)

Definition at line 598 of file xlog.c.

◆ XLogRecPtrToBufIdx

#define XLogRecPtrToBufIdx (   recptr)     (((recptr) / XLOG_BLCKSZ) % (XLogCtl->XLogCacheBlck + 1))

Definition at line 592 of file xlog.c.

Typedef Documentation

◆ WALInsertLockPadded

◆ XLogCtlData

typedef struct XLogCtlData XLogCtlData

◆ XLogCtlInsert

typedef struct XLogCtlInsert XLogCtlInsert

◆ XLogwrtResult

typedef struct XLogwrtResult XLogwrtResult

◆ XLogwrtRqst

typedef struct XLogwrtRqst XLogwrtRqst

Enumeration Type Documentation

◆ WalInsertClass

Enumerator
WALINSERT_NORMAL 
WALINSERT_SPECIAL_SWITCH 
WALINSERT_SPECIAL_CHECKPOINT 

Definition at line 560 of file xlog.c.

561{
WalInsertClass
Definition: xlog.c:561
@ WALINSERT_SPECIAL_SWITCH
Definition: xlog.c:563
@ WALINSERT_NORMAL
Definition: xlog.c:562
@ WALINSERT_SPECIAL_CHECKPOINT
Definition: xlog.c:564

Function Documentation

◆ AdvanceXLInsertBuffer()

static void AdvanceXLInsertBuffer ( XLogRecPtr  upto,
TimeLineID  tli,
bool  opportunistic 
)
static

Definition at line 1991 of file xlog.c.

1992{
1994 int nextidx;
1995 XLogRecPtr OldPageRqstPtr;
1996 XLogwrtRqst WriteRqst;
1997 XLogRecPtr NewPageEndPtr = InvalidXLogRecPtr;
1998 XLogRecPtr NewPageBeginPtr;
1999 XLogPageHeader NewPage;
2000 int npages pg_attribute_unused() = 0;
2001
2002 LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
2003
2004 /*
2005 * Now that we have the lock, check if someone initialized the page
2006 * already.
2007 */
2008 while (upto >= XLogCtl->InitializedUpTo || opportunistic)
2009 {
2011
2012 /*
2013 * Get ending-offset of the buffer page we need to replace (this may
2014 * be zero if the buffer hasn't been used yet). Fall through if it's
2015 * already written out.
2016 */
2017 OldPageRqstPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[nextidx]);
2018 if (LogwrtResult.Write < OldPageRqstPtr)
2019 {
2020 /*
2021 * Nope, got work to do. If we just want to pre-initialize as much
2022 * as we can without flushing, give up now.
2023 */
2024 if (opportunistic)
2025 break;
2026
2027 /* Advance shared memory write request position */
2029 if (XLogCtl->LogwrtRqst.Write < OldPageRqstPtr)
2030 XLogCtl->LogwrtRqst.Write = OldPageRqstPtr;
2032
2033 /*
2034 * Acquire an up-to-date LogwrtResult value and see if we still
2035 * need to write it or if someone else already did.
2036 */
2038 if (LogwrtResult.Write < OldPageRqstPtr)
2039 {
2040 /*
2041 * Must acquire write lock. Release WALBufMappingLock first,
2042 * to make sure that all insertions that we need to wait for
2043 * can finish (up to this same position). Otherwise we risk
2044 * deadlock.
2045 */
2046 LWLockRelease(WALBufMappingLock);
2047
2048 WaitXLogInsertionsToFinish(OldPageRqstPtr);
2049
2050 LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
2051
2053 if (LogwrtResult.Write >= OldPageRqstPtr)
2054 {
2055 /* OK, someone wrote it already */
2056 LWLockRelease(WALWriteLock);
2057 }
2058 else
2059 {
2060 /* Have to write it ourselves */
2061 TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_START();
2062 WriteRqst.Write = OldPageRqstPtr;
2063 WriteRqst.Flush = 0;
2064 XLogWrite(WriteRqst, tli, false);
2065 LWLockRelease(WALWriteLock);
2067 TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
2068
2069 /*
2070 * Required for the flush of pending stats WAL data, per
2071 * update of pgWalUsage.
2072 */
2073 pgstat_report_fixed = true;
2074 }
2075 /* Re-acquire WALBufMappingLock and retry */
2076 LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
2077 continue;
2078 }
2079 }
2080
2081 /*
2082 * Now the next buffer slot is free and we can set it up to be the
2083 * next output page.
2084 */
2085 NewPageBeginPtr = XLogCtl->InitializedUpTo;
2086 NewPageEndPtr = NewPageBeginPtr + XLOG_BLCKSZ;
2087
2088 Assert(XLogRecPtrToBufIdx(NewPageBeginPtr) == nextidx);
2089
2090 NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * (Size) XLOG_BLCKSZ);
2091
2092 /*
2093 * Mark the xlblock with InvalidXLogRecPtr and issue a write barrier
2094 * before initializing. Otherwise, the old page may be partially
2095 * zeroed but look valid.
2096 */
2099
2100 /*
2101 * Be sure to re-zero the buffer so that bytes beyond what we've
2102 * written will look like zeroes and not valid XLOG records...
2103 */
2104 MemSet(NewPage, 0, XLOG_BLCKSZ);
2105
2106 /*
2107 * Fill the new page's header
2108 */
2109 NewPage->xlp_magic = XLOG_PAGE_MAGIC;
2110
2111 /* NewPage->xlp_info = 0; */ /* done by memset */
2112 NewPage->xlp_tli = tli;
2113 NewPage->xlp_pageaddr = NewPageBeginPtr;
2114
2115 /* NewPage->xlp_rem_len = 0; */ /* done by memset */
2116
2117 /*
2118 * If online backup is not in progress, mark the header to indicate
2119 * that WAL records beginning in this page have removable backup
2120 * blocks. This allows the WAL archiver to know whether it is safe to
2121 * compress archived WAL data by transforming full-block records into
2122 * the non-full-block format. It is sufficient to record this at the
2123 * page level because we force a page switch (in fact a segment
2124 * switch) when starting a backup, so the flag will be off before any
2125 * records can be written during the backup. At the end of a backup,
2126 * the last page will be marked as all unsafe when perhaps only part
2127 * is unsafe, but at worst the archiver would miss the opportunity to
2128 * compress a few records.
2129 */
2130 if (Insert->runningBackups == 0)
2131 NewPage->xlp_info |= XLP_BKP_REMOVABLE;
2132
2133 /*
2134 * If first page of an XLOG segment file, make it a long header.
2135 */
2136 if ((XLogSegmentOffset(NewPage->xlp_pageaddr, wal_segment_size)) == 0)
2137 {
2138 XLogLongPageHeader NewLongPage = (XLogLongPageHeader) NewPage;
2139
2140 NewLongPage->xlp_sysid = ControlFile->system_identifier;
2141 NewLongPage->xlp_seg_size = wal_segment_size;
2142 NewLongPage->xlp_xlog_blcksz = XLOG_BLCKSZ;
2143 NewPage->xlp_info |= XLP_LONG_HEADER;
2144 }
2145
2146 /*
2147 * Make sure the initialization of the page becomes visible to others
2148 * before the xlblocks update. GetXLogBuffer() reads xlblocks without
2149 * holding a lock.
2150 */
2152
2153 pg_atomic_write_u64(&XLogCtl->xlblocks[nextidx], NewPageEndPtr);
2154 XLogCtl->InitializedUpTo = NewPageEndPtr;
2155
2156 npages++;
2157 }
2158 LWLockRelease(WALBufMappingLock);
2159
2160#ifdef WAL_DEBUG
2161 if (XLOG_DEBUG && npages > 0)
2162 {
2163 elog(DEBUG1, "initialized %d pages, up to %X/%08X",
2164 npages, LSN_FORMAT_ARGS(NewPageEndPtr));
2165 }
2166#endif
2167}
static void pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:483
#define pg_write_barrier()
Definition: atomics.h:155
#define pg_attribute_unused()
Definition: c.h:137
#define MemSet(start, val, len)
Definition: c.h:1024
size_t Size
Definition: c.h:615
#define DEBUG1
Definition: elog.h:30
#define elog(elevel,...)
Definition: elog.h:226
static void Insert(File file)
Definition: fd.c:1314
Assert(PointerIsAligned(start, uint64))
WalUsage pgWalUsage
Definition: instrument.c:22
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1174
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1894
@ LW_EXCLUSIVE
Definition: lwlock.h:112
bool pgstat_report_fixed
Definition: pgstat.c:218
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
uint64 system_identifier
Definition: pg_control.h:110
int64 wal_buffers_full
Definition: instrument.h:57
XLogwrtRqst LogwrtRqst
Definition: xlog.c:457
slock_t info_lck
Definition: xlog.c:554
XLogRecPtr InitializedUpTo
Definition: xlog.c:486
char * pages
Definition: xlog.c:493
pg_atomic_uint64 * xlblocks
Definition: xlog.c:494
XLogCtlInsert Insert
Definition: xlog.c:454
TimeLineID xlp_tli
Definition: xlog_internal.h:40
XLogRecPtr xlp_pageaddr
Definition: xlog_internal.h:41
XLogRecPtr Write
Definition: xlog.c:329
XLogRecPtr Flush
Definition: xlog.c:324
XLogRecPtr Write
Definition: xlog.c:323
static XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto)
Definition: xlog.c:1510
#define RefreshXLogWriteResult(_target)
Definition: xlog.c:621
int wal_segment_size
Definition: xlog.c:145
static XLogwrtResult LogwrtResult
Definition: xlog.c:613
#define XLogRecPtrToBufIdx(recptr)
Definition: xlog.c:592
static void XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
Definition: xlog.c:2307
static ControlFileData * ControlFile
Definition: xlog.c:575
XLogLongPageHeaderData * XLogLongPageHeader
Definition: xlog_internal.h:71
#define XLogSegmentOffset(xlogptr, wal_segsz_bytes)
XLogPageHeaderData * XLogPageHeader
Definition: xlog_internal.h:54
#define XLP_LONG_HEADER
Definition: xlog_internal.h:76
#define XLP_BKP_REMOVABLE
Definition: xlog_internal.h:78
#define XLOG_PAGE_MAGIC
Definition: xlog_internal.h:34
#define LSN_FORMAT_ARGS(lsn)
Definition: xlogdefs.h:47
uint64 XLogRecPtr
Definition: xlogdefs.h:21
#define InvalidXLogRecPtr
Definition: xlogdefs.h:28

References Assert(), ControlFile, DEBUG1, elog, XLogwrtRqst::Flush, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, XLogCtlData::Insert, Insert(), InvalidXLogRecPtr, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, XLogCtlData::pages, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_attribute_unused, pg_write_barrier, pgstat_report_fixed, pgWalUsage, RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, ControlFileData::system_identifier, WaitXLogInsertionsToFinish(), WalUsage::wal_buffers_full, wal_segment_size, XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, XLogSegmentOffset, XLogWrite(), XLP_BKP_REMOVABLE, XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, XLogPageHeaderData::xlp_tli, and XLogLongPageHeaderData::xlp_xlog_blcksz.

Referenced by GetXLogBuffer(), and XLogBackgroundFlush().

◆ assign_checkpoint_completion_target()

void assign_checkpoint_completion_target ( double  newval,
void *  extra 
)

Definition at line 2210 of file xlog.c.

2211{
2214}
double CheckPointCompletionTarget
Definition: checkpointer.c:159
#define newval
static void CalculateCheckpointSegments(void)
Definition: xlog.c:2174

References CalculateCheckpointSegments(), CheckPointCompletionTarget, and newval.

◆ assign_max_wal_size()

void assign_max_wal_size ( int  newval,
void *  extra 
)

Definition at line 2203 of file xlog.c.

2204{
2207}
int max_wal_size_mb
Definition: xlog.c:116

References CalculateCheckpointSegments(), max_wal_size_mb, and newval.

◆ assign_wal_consistency_checking()

void assign_wal_consistency_checking ( const char *  newval,
void *  extra 
)

Definition at line 4813 of file xlog.c.

4814{
4815 /*
4816 * If some checks were deferred, it's possible that the checks will fail
4817 * later during InitializeWalConsistencyChecking(). But in that case, the
4818 * postmaster will exit anyway, so it's safe to proceed with the
4819 * assignment.
4820 *
4821 * Any built-in resource managers specified are assigned immediately,
4822 * which affects WAL created before shared_preload_libraries are
4823 * processed. Any custom resource managers specified won't be assigned
4824 * until after shared_preload_libraries are processed, but that's OK
4825 * because WAL for a custom resource manager can't be written before the
4826 * module is loaded anyway.
4827 */
4829}
bool * wal_consistency_checking
Definition: xlog.c:128

References wal_consistency_checking.

◆ assign_wal_sync_method()

void assign_wal_sync_method ( int  new_wal_sync_method,
void *  extra 
)

Definition at line 8717 of file xlog.c.

8718{
8719 if (wal_sync_method != new_wal_sync_method)
8720 {
8721 /*
8722 * To ensure that no blocks escape unsynced, force an fsync on the
8723 * currently open log segment (if any). Also, if the open flag is
8724 * changing, close the log file so it will be reopened (with new flag
8725 * bit) at next use.
8726 */
8727 if (openLogFile >= 0)
8728 {
8729 pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN);
8730 if (pg_fsync(openLogFile) != 0)
8731 {
8732 char xlogfname[MAXFNAMELEN];
8733 int save_errno;
8734
8735 save_errno = errno;
8738 errno = save_errno;
8739 ereport(PANIC,
8741 errmsg("could not fsync file \"%s\": %m", xlogfname)));
8742 }
8743
8745 if (get_sync_bit(wal_sync_method) != get_sync_bit(new_wal_sync_method))
8746 XLogFileClose();
8747 }
8748 }
8749}
int errcode_for_file_access(void)
Definition: elog.c:886
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define PANIC
Definition: elog.h:42
#define ereport(elevel,...)
Definition: elog.h:150
int pg_fsync(int fd)
Definition: fd.c:386
static void pgstat_report_wait_start(uint32 wait_event_info)
Definition: wait_event.h:69
static void pgstat_report_wait_end(void)
Definition: wait_event.h:85
static int openLogFile
Definition: xlog.c:636
static int get_sync_bit(int method)
Definition: xlog.c:8669
int wal_sync_method
Definition: xlog.c:132
static TimeLineID openLogTLI
Definition: xlog.c:638
static void XLogFileClose(void)
Definition: xlog.c:3658
static XLogSegNo openLogSegNo
Definition: xlog.c:637
#define MAXFNAMELEN
static void XLogFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)

References ereport, errcode_for_file_access(), errmsg(), get_sync_bit(), MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), wal_segment_size, wal_sync_method, XLogFileClose(), and XLogFileName().

◆ BootStrapXLOG()

void BootStrapXLOG ( uint32  data_checksum_version)

Definition at line 5089 of file xlog.c.

5090{
5091 CheckPoint checkPoint;
5092 char *buffer;
5093 XLogPageHeader page;
5094 XLogLongPageHeader longpage;
5095 XLogRecord *record;
5096 char *recptr;
5097 uint64 sysidentifier;
5098 struct timeval tv;
5099 pg_crc32c crc;
5100
5101 /* allow ordinary WAL segment creation, like StartupXLOG() would */
5103
5104 /*
5105 * Select a hopefully-unique system identifier code for this installation.
5106 * We use the result of gettimeofday(), including the fractional seconds
5107 * field, as being about as unique as we can easily get. (Think not to
5108 * use random(), since it hasn't been seeded and there's no portable way
5109 * to seed it other than the system clock value...) The upper half of the
5110 * uint64 value is just the tv_sec part, while the lower half contains the
5111 * tv_usec part (which must fit in 20 bits), plus 12 bits from our current
5112 * PID for a little extra uniqueness. A person knowing this encoding can
5113 * determine the initialization time of the installation, which could
5114 * perhaps be useful sometimes.
5115 */
5116 gettimeofday(&tv, NULL);
5117 sysidentifier = ((uint64) tv.tv_sec) << 32;
5118 sysidentifier |= ((uint64) tv.tv_usec) << 12;
5119 sysidentifier |= getpid() & 0xFFF;
5120
5121 /* page buffer must be aligned suitably for O_DIRECT */
5122 buffer = (char *) palloc(XLOG_BLCKSZ + XLOG_BLCKSZ);
5123 page = (XLogPageHeader) TYPEALIGN(XLOG_BLCKSZ, buffer);
5124 memset(page, 0, XLOG_BLCKSZ);
5125
5126 /*
5127 * Set up information for the initial checkpoint record
5128 *
5129 * The initial checkpoint record is written to the beginning of the WAL
5130 * segment with logid=0 logseg=1. The very first WAL segment, 0/0, is not
5131 * used, so that we can use 0/0 to mean "before any valid WAL segment".
5132 */
5136 checkPoint.fullPageWrites = fullPageWrites;
5137 checkPoint.wal_level = wal_level;
5138 checkPoint.nextXid =
5140 checkPoint.nextOid = FirstGenbkiObjectId;
5141 checkPoint.nextMulti = FirstMultiXactId;
5142 checkPoint.nextMultiOffset = 0;
5144 checkPoint.oldestXidDB = Template1DbOid;
5145 checkPoint.oldestMulti = FirstMultiXactId;
5146 checkPoint.oldestMultiDB = Template1DbOid;
5149 checkPoint.time = (pg_time_t) time(NULL);
5151
5152 TransamVariables->nextXid = checkPoint.nextXid;
5153 TransamVariables->nextOid = checkPoint.nextOid;
5155 MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5156 AdvanceOldestClogXid(checkPoint.oldestXid);
5157 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5158 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5160
5161 /* Set up the XLOG page header */
5162 page->xlp_magic = XLOG_PAGE_MAGIC;
5163 page->xlp_info = XLP_LONG_HEADER;
5166 longpage = (XLogLongPageHeader) page;
5167 longpage->xlp_sysid = sysidentifier;
5168 longpage->xlp_seg_size = wal_segment_size;
5169 longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
5170
5171 /* Insert the initial checkpoint record */
5172 recptr = ((char *) page + SizeOfXLogLongPHD);
5173 record = (XLogRecord *) recptr;
5174 record->xl_prev = 0;
5175 record->xl_xid = InvalidTransactionId;
5176 record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
5178 record->xl_rmid = RM_XLOG_ID;
5179 recptr += SizeOfXLogRecord;
5180 /* fill the XLogRecordDataHeaderShort struct */
5181 *(recptr++) = (char) XLR_BLOCK_ID_DATA_SHORT;
5182 *(recptr++) = sizeof(checkPoint);
5183 memcpy(recptr, &checkPoint, sizeof(checkPoint));
5184 recptr += sizeof(checkPoint);
5185 Assert(recptr - (char *) record == record->xl_tot_len);
5186
5188 COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
5189 COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
5190 FIN_CRC32C(crc);
5191 record->xl_crc = crc;
5192
5193 /* Create first XLOG segment file */
5196
5197 /*
5198 * We needn't bother with Reserve/ReleaseExternalFD here, since we'll
5199 * close the file again in a moment.
5200 */
5201
5202 /* Write the first page with the initial record */
5203 errno = 0;
5204 pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_WRITE);
5205 if (write(openLogFile, page, XLOG_BLCKSZ) != XLOG_BLCKSZ)
5206 {
5207 /* if write didn't set errno, assume problem is no disk space */
5208 if (errno == 0)
5209 errno = ENOSPC;
5210 ereport(PANIC,
5212 errmsg("could not write bootstrap write-ahead log file: %m")));
5213 }
5215
5216 pgstat_report_wait_start(WAIT_EVENT_WAL_BOOTSTRAP_SYNC);
5217 if (pg_fsync(openLogFile) != 0)
5218 ereport(PANIC,
5220 errmsg("could not fsync bootstrap write-ahead log file: %m")));
5222
5223 if (close(openLogFile) != 0)
5224 ereport(PANIC,
5226 errmsg("could not close bootstrap write-ahead log file: %m")));
5227
5228 openLogFile = -1;
5229
5230 /* Now create pg_control */
5231 InitControlFile(sysidentifier, data_checksum_version);
5232 ControlFile->time = checkPoint.time;
5233 ControlFile->checkPoint = checkPoint.redo;
5234 ControlFile->checkPointCopy = checkPoint;
5235
5236 /* some additional ControlFile fields are set in WriteControlFile() */
5238
5239 /* Bootstrap the commit log, too */
5240 BootStrapCLOG();
5244
5245 pfree(buffer);
5246
5247 /*
5248 * Force control file to be read - in contrast to normal processing we'd
5249 * otherwise never run the checks and GUC related initializations therein.
5250 */
5252}
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:808
uint64_t uint64
Definition: c.h:544
void BootStrapCLOG(void)
Definition: clog.c:832
void BootStrapCommitTs(void)
Definition: commit_ts.c:594
void SetCommitTsLimit(TransactionId oldestXact, TransactionId newestXact)
Definition: commit_ts.c:887
#define close(a)
Definition: win32.h:12
#define write(a, b, c)
Definition: win32.h:14
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc(Size size)
Definition: mcxt.c:1365
void MultiXactSetNextMXact(MultiXactId nextMulti, MultiXactOffset nextMultiOffset)
Definition: multixact.c:2258
void SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid, bool is_startup)
Definition: multixact.c:2292
void BootStrapMultiXact(void)
Definition: multixact.c:2025
#define FirstMultiXactId
Definition: multixact.h:26
#define XLOG_CHECKPOINT_SHUTDOWN
Definition: pg_control.h:68
uint32 pg_crc32c
Definition: pg_crc32c.h:38
#define COMP_CRC32C(crc, data, len)
Definition: pg_crc32c.h:153
#define INIT_CRC32C(crc)
Definition: pg_crc32c.h:41
#define FIN_CRC32C(crc)
Definition: pg_crc32c.h:158
return crc
int64 pg_time_t
Definition: pgtime.h:23
Oid oldestMultiDB
Definition: pg_control.h:51
MultiXactId oldestMulti
Definition: pg_control.h:50
MultiXactOffset nextMultiOffset
Definition: pg_control.h:47
TransactionId newestCommitTsXid
Definition: pg_control.h:55
TransactionId oldestXid
Definition: pg_control.h:48
TimeLineID PrevTimeLineID
Definition: pg_control.h:40
TimeLineID ThisTimeLineID
Definition: pg_control.h:39
Oid nextOid
Definition: pg_control.h:45
TransactionId oldestActiveXid
Definition: pg_control.h:64
bool fullPageWrites
Definition: pg_control.h:42
MultiXactId nextMulti
Definition: pg_control.h:46
FullTransactionId nextXid
Definition: pg_control.h:44
TransactionId oldestCommitTsXid
Definition: pg_control.h:53
pg_time_t time
Definition: pg_control.h:52
int wal_level
Definition: pg_control.h:43
XLogRecPtr redo
Definition: pg_control.h:37
Oid oldestXidDB
Definition: pg_control.h:49
CheckPoint checkPointCopy
Definition: pg_control.h:135
pg_time_t time
Definition: pg_control.h:132
XLogRecPtr checkPoint
Definition: pg_control.h:133
FullTransactionId nextXid
Definition: transam.h:220
XLogRecPtr xl_prev
Definition: xlogrecord.h:45
uint8 xl_info
Definition: xlogrecord.h:46
uint32 xl_tot_len
Definition: xlogrecord.h:43
TransactionId xl_xid
Definition: xlogrecord.h:44
RmgrId xl_rmid
Definition: xlogrecord.h:47
void BootStrapSUBTRANS(void)
Definition: subtrans.c:269
#define InvalidTransactionId
Definition: transam.h:31
#define FirstGenbkiObjectId
Definition: transam.h:195
#define FirstNormalTransactionId
Definition: transam.h:34
static FullTransactionId FullTransactionIdFromEpochAndXid(uint32 epoch, TransactionId xid)
Definition: transam.h:71
void SetTransactionIdLimit(TransactionId oldest_datfrozenxid, Oid oldest_datoid)
Definition: varsup.c:372
void AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
Definition: varsup.c:355
TransamVariablesData * TransamVariables
Definition: varsup.c:34
int gettimeofday(struct timeval *tp, void *tzp)
int XLogFileInit(XLogSegNo logsegno, TimeLineID logtli)
Definition: xlog.c:3399
bool fullPageWrites
Definition: xlog.c:124
static void InitControlFile(uint64 sysidentifier, uint32 data_checksum_version)
Definition: xlog.c:4223
void SetInstallXLogFileSegmentActive(void)
Definition: xlog.c:9547
int wal_level
Definition: xlog.c:133
static void WriteControlFile(void)
Definition: xlog.c:4258
#define BootstrapTimeLineID
Definition: xlog.c:113
static void ReadControlFile(void)
Definition: xlog.c:4368
#define SizeOfXLogLongPHD
Definition: xlog_internal.h:69
#define SizeOfXLogRecordDataHeaderShort
Definition: xlogrecord.h:217
#define XLR_BLOCK_ID_DATA_SHORT
Definition: xlogrecord.h:241
#define SizeOfXLogRecord
Definition: xlogrecord.h:55

References AdvanceOldestClogXid(), Assert(), BootStrapCLOG(), BootStrapCommitTs(), BootStrapMultiXact(), BootStrapSUBTRANS(), BootstrapTimeLineID, ControlFileData::checkPoint, ControlFileData::checkPointCopy, close, COMP_CRC32C, ControlFile, crc, ereport, errcode_for_file_access(), errmsg(), FIN_CRC32C, FirstGenbkiObjectId, FirstMultiXactId, FirstNormalTransactionId, fullPageWrites, CheckPoint::fullPageWrites, FullTransactionIdFromEpochAndXid(), gettimeofday(), INIT_CRC32C, InitControlFile(), InvalidTransactionId, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, CheckPoint::oldestXid, CheckPoint::oldestXidDB, openLogFile, openLogTLI, palloc(), PANIC, pfree(), pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), CheckPoint::PrevTimeLineID, ReadControlFile(), CheckPoint::redo, SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogRecordDataHeaderShort, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, TransamVariables, TYPEALIGN, wal_level, CheckPoint::wal_level, wal_segment_size, write, WriteControlFile(), XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XLogRecord::xl_tot_len, XLogRecord::xl_xid, XLOG_CHECKPOINT_SHUTDOWN, XLOG_PAGE_MAGIC, XLogFileInit(), XLogPageHeaderData::xlp_info, XLP_LONG_HEADER, XLogPageHeaderData::xlp_magic, XLogPageHeaderData::xlp_pageaddr, XLogLongPageHeaderData::xlp_seg_size, XLogLongPageHeaderData::xlp_sysid, XLogPageHeaderData::xlp_tli, XLogLongPageHeaderData::xlp_xlog_blcksz, and XLR_BLOCK_ID_DATA_SHORT.

Referenced by BootstrapModeMain().

◆ CalculateCheckpointSegments()

static void CalculateCheckpointSegments ( void  )
static

Definition at line 2174 of file xlog.c.

2175{
2176 double target;
2177
2178 /*-------
2179 * Calculate the distance at which to trigger a checkpoint, to avoid
2180 * exceeding max_wal_size_mb. This is based on two assumptions:
2181 *
2182 * a) we keep WAL for only one checkpoint cycle (prior to PG11 we kept
2183 * WAL for two checkpoint cycles to allow us to recover from the
2184 * secondary checkpoint if the first checkpoint failed, though we
2185 * only did this on the primary anyway, not on standby. Keeping just
2186 * one checkpoint simplifies processing and reduces disk space in
2187 * many smaller databases.)
2188 * b) during checkpoint, we consume checkpoint_completion_target *
2189 * number of segments consumed between checkpoints.
2190 *-------
2191 */
2192 target = (double) ConvertToXSegs(max_wal_size_mb, wal_segment_size) /
2194
2195 /* round down */
2196 CheckPointSegments = (int) target;
2197
2198 if (CheckPointSegments < 1)
2200}
#define ConvertToXSegs(x, segsize)
Definition: xlog.c:604
int CheckPointSegments
Definition: xlog.c:158

References CheckPointCompletionTarget, CheckPointSegments, ConvertToXSegs, max_wal_size_mb, and wal_segment_size.

Referenced by assign_checkpoint_completion_target(), assign_max_wal_size(), and ReadControlFile().

◆ check_wal_buffers()

bool check_wal_buffers ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 4691 of file xlog.c.

4692{
4693 /*
4694 * -1 indicates a request for auto-tune.
4695 */
4696 if (*newval == -1)
4697 {
4698 /*
4699 * If we haven't yet changed the boot_val default of -1, just let it
4700 * be. We'll fix it when XLOGShmemSize is called.
4701 */
4702 if (XLOGbuffers == -1)
4703 return true;
4704
4705 /* Otherwise, substitute the auto-tune value */
4707 }
4708
4709 /*
4710 * We clamp manually-set values to at least 4 blocks. Prior to PostgreSQL
4711 * 9.1, a minimum of 4 was enforced by guc.c, but since that is no longer
4712 * the case, we just silently treat such values as a request for the
4713 * minimum. (We could throw an error instead, but that doesn't seem very
4714 * helpful.)
4715 */
4716 if (*newval < 4)
4717 *newval = 4;
4718
4719 return true;
4720}
static int XLOGChooseNumBuffers(void)
Definition: xlog.c:4675
int XLOGbuffers
Definition: xlog.c:119

References newval, XLOGbuffers, and XLOGChooseNumBuffers().

◆ check_wal_consistency_checking()

bool check_wal_consistency_checking ( char **  newval,
void **  extra,
GucSource  source 
)

Definition at line 4726 of file xlog.c.

4727{
4728 char *rawstring;
4729 List *elemlist;
4730 ListCell *l;
4731 bool newwalconsistency[RM_MAX_ID + 1];
4732
4733 /* Initialize the array */
4734 MemSet(newwalconsistency, 0, (RM_MAX_ID + 1) * sizeof(bool));
4735
4736 /* Need a modifiable copy of string */
4737 rawstring = pstrdup(*newval);
4738
4739 /* Parse string into list of identifiers */
4740 if (!SplitIdentifierString(rawstring, ',', &elemlist))
4741 {
4742 /* syntax error in list */
4743 GUC_check_errdetail("List syntax is invalid.");
4744 pfree(rawstring);
4745 list_free(elemlist);
4746 return false;
4747 }
4748
4749 foreach(l, elemlist)
4750 {
4751 char *tok = (char *) lfirst(l);
4752 int rmid;
4753
4754 /* Check for 'all'. */
4755 if (pg_strcasecmp(tok, "all") == 0)
4756 {
4757 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
4758 if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL)
4759 newwalconsistency[rmid] = true;
4760 }
4761 else
4762 {
4763 /* Check if the token matches any known resource manager. */
4764 bool found = false;
4765
4766 for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
4767 {
4768 if (RmgrIdExists(rmid) && GetRmgr(rmid).rm_mask != NULL &&
4769 pg_strcasecmp(tok, GetRmgr(rmid).rm_name) == 0)
4770 {
4771 newwalconsistency[rmid] = true;
4772 found = true;
4773 break;
4774 }
4775 }
4776 if (!found)
4777 {
4778 /*
4779 * During startup, it might be a not-yet-loaded custom
4780 * resource manager. Defer checking until
4781 * InitializeWalConsistencyChecking().
4782 */
4784 {
4786 }
4787 else
4788 {
4789 GUC_check_errdetail("Unrecognized key word: \"%s\".", tok);
4790 pfree(rawstring);
4791 list_free(elemlist);
4792 return false;
4793 }
4794 }
4795 }
4796 }
4797
4798 pfree(rawstring);
4799 list_free(elemlist);
4800
4801 /* assign new value */
4802 *extra = guc_malloc(LOG, (RM_MAX_ID + 1) * sizeof(bool));
4803 if (!*extra)
4804 return false;
4805 memcpy(*extra, newwalconsistency, (RM_MAX_ID + 1) * sizeof(bool));
4806 return true;
4807}
#define LOG
Definition: elog.h:31
void * guc_malloc(int elevel, size_t size)
Definition: guc.c:636
#define GUC_check_errdetail
Definition: guc.h:505
void list_free(List *list)
Definition: list.c:1546
char * pstrdup(const char *in)
Definition: mcxt.c:1759
bool process_shared_preload_libraries_done
Definition: miscinit.c:1787
#define lfirst(lc)
Definition: pg_list.h:172
int pg_strcasecmp(const char *s1, const char *s2)
Definition: pgstrcasecmp.c:36
#define RM_MAX_ID
Definition: rmgr.h:33
Definition: pg_list.h:54
void(* rm_mask)(char *pagedata, BlockNumber blkno)
bool SplitIdentifierString(char *rawstring, char separator, List **namelist)
Definition: varlena.c:2744
static bool check_wal_consistency_checking_deferred
Definition: xlog.c:168
static RmgrData GetRmgr(RmgrId rmid)
static bool RmgrIdExists(RmgrId rmid)

References check_wal_consistency_checking_deferred, GetRmgr(), GUC_check_errdetail, guc_malloc(), lfirst, list_free(), LOG, MemSet, newval, pfree(), pg_strcasecmp(), process_shared_preload_libraries_done, pstrdup(), RmgrData::rm_mask, RM_MAX_ID, RmgrIdExists(), and SplitIdentifierString().

◆ check_wal_segment_size()

bool check_wal_segment_size ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 2217 of file xlog.c.

2218{
2220 {
2221 GUC_check_errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.");
2222 return false;
2223 }
2224
2225 return true;
2226}
#define IsValidWalSegSize(size)
Definition: xlog_internal.h:96

References GUC_check_errdetail, IsValidWalSegSize, and newval.

◆ CheckPointGuts()

static void CheckPointGuts ( XLogRecPtr  checkPointRedo,
int  flags 
)
static

Definition at line 7579 of file xlog.c.

7580{
7586
7587 /* Write out all dirty data in SLRUs and the main buffer pool */
7588 TRACE_POSTGRESQL_BUFFER_CHECKPOINT_START(flags);
7595 CheckPointBuffers(flags);
7596
7597 /* Perform all queued up fsyncs */
7598 TRACE_POSTGRESQL_BUFFER_CHECKPOINT_SYNC_START();
7602 TRACE_POSTGRESQL_BUFFER_CHECKPOINT_DONE();
7603
7604 /* We deliberately delay 2PC checkpointing as long as possible */
7605 CheckPointTwoPhase(checkPointRedo);
7606}
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1645
void CheckPointBuffers(int flags)
Definition: bufmgr.c:4209
void CheckPointCLOG(void)
Definition: clog.c:903
void CheckPointCommitTs(void)
Definition: commit_ts.c:794
void CheckPointMultiXact(void)
Definition: multixact.c:2234
void CheckPointReplicationOrigin(void)
Definition: origin.c:596
void CheckPointPredicate(void)
Definition: predicate.c:1041
void CheckPointRelationMap(void)
Definition: relmapper.c:611
void CheckPointLogicalRewriteHeap(void)
Definition: rewriteheap.c:1154
void CheckPointReplicationSlots(bool is_shutdown)
Definition: slot.c:2125
void CheckPointSnapBuild(void)
Definition: snapbuild.c:1970
TimestampTz ckpt_write_t
Definition: xlog.h:162
TimestampTz ckpt_sync_end_t
Definition: xlog.h:164
TimestampTz ckpt_sync_t
Definition: xlog.h:163
void CheckPointSUBTRANS(void)
Definition: subtrans.c:329
void ProcessSyncRequests(void)
Definition: sync.c:286
void CheckPointTwoPhase(XLogRecPtr redo_horizon)
Definition: twophase.c:1822
CheckpointStatsData CheckpointStats
Definition: xlog.c:211
#define CHECKPOINT_IS_SHUTDOWN
Definition: xlog.h:139

References CHECKPOINT_IS_SHUTDOWN, CheckPointBuffers(), CheckPointCLOG(), CheckPointCommitTs(), CheckPointLogicalRewriteHeap(), CheckPointMultiXact(), CheckPointPredicate(), CheckPointRelationMap(), CheckPointReplicationOrigin(), CheckPointReplicationSlots(), CheckPointSnapBuild(), CheckpointStats, CheckPointSUBTRANS(), CheckPointTwoPhase(), CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, GetCurrentTimestamp(), and ProcessSyncRequests().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ CheckRequiredParameterValues()

static void CheckRequiredParameterValues ( void  )
static

Definition at line 5439 of file xlog.c.

5440{
5441 /*
5442 * For archive recovery, the WAL must be generated with at least 'replica'
5443 * wal_level.
5444 */
5446 {
5447 ereport(FATAL,
5448 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5449 errmsg("WAL was generated with \"wal_level=minimal\", cannot continue recovering"),
5450 errdetail("This happens if you temporarily set \"wal_level=minimal\" on the server."),
5451 errhint("Use a backup taken after setting \"wal_level\" to higher than \"minimal\".")));
5452 }
5453
5454 /*
5455 * For Hot Standby, the WAL must be generated with 'replica' mode, and we
5456 * must have at least as many backend slots as the primary.
5457 */
5459 {
5460 /* We ignore autovacuum_worker_slots when we make this test. */
5461 RecoveryRequiresIntParameter("max_connections",
5464 RecoveryRequiresIntParameter("max_worker_processes",
5467 RecoveryRequiresIntParameter("max_wal_senders",
5470 RecoveryRequiresIntParameter("max_prepared_transactions",
5473 RecoveryRequiresIntParameter("max_locks_per_transaction",
5476 }
5477}
int errdetail(const char *fmt,...)
Definition: elog.c:1216
int errhint(const char *fmt,...)
Definition: elog.c:1330
int errcode(int sqlerrcode)
Definition: elog.c:863
#define FATAL
Definition: elog.h:41
int MaxConnections
Definition: globals.c:143
int max_worker_processes
Definition: globals.c:144
int max_locks_per_xact
Definition: lock.c:53
int max_worker_processes
Definition: pg_control.h:181
int max_locks_per_xact
Definition: pg_control.h:184
int max_prepared_xacts
Definition: pg_control.h:183
int max_prepared_xacts
Definition: twophase.c:116
int max_wal_senders
Definition: walsender.c:129
bool EnableHotStandby
Definition: xlog.c:123
@ WAL_LEVEL_MINIMAL
Definition: xlog.h:74
bool ArchiveRecoveryRequested
Definition: xlogrecovery.c:140
void RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue)

References ArchiveRecoveryRequested, ControlFile, EnableHotStandby, ereport, errcode(), errdetail(), errhint(), errmsg(), FATAL, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, RecoveryRequiresIntParameter(), ControlFileData::wal_level, and WAL_LEVEL_MINIMAL.

Referenced by StartupXLOG(), and xlog_redo().

◆ CheckXLogRemoved()

void CheckXLogRemoved ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3746 of file xlog.c.

3747{
3748 int save_errno = errno;
3749 XLogSegNo lastRemovedSegNo;
3750
3752 lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3754
3755 if (segno <= lastRemovedSegNo)
3756 {
3757 char filename[MAXFNAMELEN];
3758
3760 errno = save_errno;
3761 ereport(ERROR,
3763 errmsg("requested WAL segment %s has already been removed",
3764 filename)));
3765 }
3766 errno = save_errno;
3767}
#define ERROR
Definition: elog.h:39
static char * filename
Definition: pg_dumpall.c:120
XLogSegNo lastRemovedSegNo
Definition: xlog.c:462
uint64 XLogSegNo
Definition: xlogdefs.h:52

References ereport, errcode_for_file_access(), errmsg(), ERROR, filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, MAXFNAMELEN, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFileName().

Referenced by logical_read_xlog_page(), perform_base_backup(), and XLogSendPhysical().

◆ CleanupAfterArchiveRecovery()

static void CleanupAfterArchiveRecovery ( TimeLineID  EndOfLogTLI,
XLogRecPtr  EndOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5343 of file xlog.c.

5345{
5346 /*
5347 * Execute the recovery_end_command, if any.
5348 */
5349 if (recoveryEndCommand && strcmp(recoveryEndCommand, "") != 0)
5351 "recovery_end_command",
5352 true,
5353 WAIT_EVENT_RECOVERY_END_COMMAND);
5354
5355 /*
5356 * We switched to a new timeline. Clean up segments on the old timeline.
5357 *
5358 * If there are any higher-numbered segments on the old timeline, remove
5359 * them. They might contain valid WAL, but they might also be
5360 * pre-allocated files containing garbage. In any case, they are not part
5361 * of the new timeline's history so we don't need them.
5362 */
5363 RemoveNonParentXlogFiles(EndOfLog, newTLI);
5364
5365 /*
5366 * If the switch happened in the middle of a segment, what to do with the
5367 * last, partial segment on the old timeline? If we don't archive it, and
5368 * the server that created the WAL never archives it either (e.g. because
5369 * it was hit by a meteor), it will never make it to the archive. That's
5370 * OK from our point of view, because the new segment that we created with
5371 * the new TLI contains all the WAL from the old timeline up to the switch
5372 * point. But if you later try to do PITR to the "missing" WAL on the old
5373 * timeline, recovery won't find it in the archive. It's physically
5374 * present in the new file with new TLI, but recovery won't look there
5375 * when it's recovering to the older timeline. On the other hand, if we
5376 * archive the partial segment, and the original server on that timeline
5377 * is still running and archives the completed version of the same segment
5378 * later, it will fail. (We used to do that in 9.4 and below, and it
5379 * caused such problems).
5380 *
5381 * As a compromise, we rename the last segment with the .partial suffix,
5382 * and archive it. Archive recovery will never try to read .partial
5383 * segments, so they will normally go unused. But in the odd PITR case,
5384 * the administrator can copy them manually to the pg_wal directory
5385 * (removing the suffix). They can be useful in debugging, too.
5386 *
5387 * If a .done or .ready file already exists for the old timeline, however,
5388 * we had already determined that the segment is complete, so we can let
5389 * it be archived normally. (In particular, if it was restored from the
5390 * archive to begin with, it's expected to have a .done file).
5391 */
5392 if (XLogSegmentOffset(EndOfLog, wal_segment_size) != 0 &&
5394 {
5395 char origfname[MAXFNAMELEN];
5396 XLogSegNo endLogSegNo;
5397
5398 XLByteToPrevSeg(EndOfLog, endLogSegNo, wal_segment_size);
5399 XLogFileName(origfname, EndOfLogTLI, endLogSegNo, wal_segment_size);
5400
5401 if (!XLogArchiveIsReadyOrDone(origfname))
5402 {
5403 char origpath[MAXPGPATH];
5404 char partialfname[MAXFNAMELEN];
5405 char partialpath[MAXPGPATH];
5406
5407 /*
5408 * If we're summarizing WAL, we can't rename the partial file
5409 * until the summarizer finishes with it, else it will fail.
5410 */
5411 if (summarize_wal)
5412 WaitForWalSummarization(EndOfLog);
5413
5414 XLogFilePath(origpath, EndOfLogTLI, endLogSegNo, wal_segment_size);
5415 snprintf(partialfname, MAXFNAMELEN, "%s.partial", origfname);
5416 snprintf(partialpath, MAXPGPATH, "%s.partial", origpath);
5417
5418 /*
5419 * Make sure there's no .done or .ready file for the .partial
5420 * file.
5421 */
5422 XLogArchiveCleanup(partialfname);
5423
5424 durable_rename(origpath, partialpath, ERROR);
5425 XLogArchiveNotify(partialfname);
5426 }
5427 }
5428}
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:779
#define MAXPGPATH
#define snprintf
Definition: port.h:239
bool summarize_wal
void WaitForWalSummarization(XLogRecPtr lsn)
void RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI)
Definition: xlog.c:3959
#define XLogArchivingActive()
Definition: xlog.h:99
#define XLByteToPrevSeg(xlrp, logSegNo, wal_segsz_bytes)
static void XLogFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, int wal_segsz_bytes)
bool XLogArchiveIsReadyOrDone(const char *xlog)
Definition: xlogarchive.c:664
void ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOnSignal, uint32 wait_event_info)
Definition: xlogarchive.c:295
void XLogArchiveNotify(const char *xlog)
Definition: xlogarchive.c:444
void XLogArchiveCleanup(const char *xlog)
Definition: xlogarchive.c:712
char * recoveryEndCommand
Definition: xlogrecovery.c:86

References durable_rename(), ERROR, ExecuteRecoveryCommand(), MAXFNAMELEN, MAXPGPATH, recoveryEndCommand, RemoveNonParentXlogFiles(), snprintf, summarize_wal, WaitForWalSummarization(), wal_segment_size, XLByteToPrevSeg, XLogArchiveCleanup(), XLogArchiveIsReadyOrDone(), XLogArchiveNotify(), XLogArchivingActive, XLogFileName(), XLogFilePath(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ CleanupBackupHistory()

static void CleanupBackupHistory ( void  )
static

Definition at line 4180 of file xlog.c.

4181{
4182 DIR *xldir;
4183 struct dirent *xlde;
4184 char path[MAXPGPATH + sizeof(XLOGDIR)];
4185
4186 xldir = AllocateDir(XLOGDIR);
4187
4188 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
4189 {
4191 {
4192 if (XLogArchiveCheckDone(xlde->d_name))
4193 {
4194 elog(DEBUG2, "removing WAL backup history file \"%s\"",
4195 xlde->d_name);
4196 snprintf(path, sizeof(path), XLOGDIR "/%s", xlde->d_name);
4197 unlink(path);
4199 }
4200 }
4201 }
4202
4203 FreeDir(xldir);
4204}
#define DEBUG2
Definition: elog.h:29
int FreeDir(DIR *dir)
Definition: fd.c:3022
DIR * AllocateDir(const char *dirname)
Definition: fd.c:2904
struct dirent * ReadDir(DIR *dir, const char *dirname)
Definition: fd.c:2970
Definition: dirent.c:26
Definition: dirent.h:10
char d_name[MAX_PATH]
Definition: dirent.h:15
#define XLOGDIR
static bool IsBackupHistoryFileName(const char *fname)
bool XLogArchiveCheckDone(const char *xlog)
Definition: xlogarchive.c:565

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsBackupHistoryFileName(), MAXPGPATH, ReadDir(), snprintf, XLogArchiveCheckDone(), XLogArchiveCleanup(), and XLOGDIR.

Referenced by do_pg_backup_stop().

◆ CopyXLogRecordToWAL()

static void CopyXLogRecordToWAL ( int  write_len,
bool  isLogSwitch,
XLogRecData rdata,
XLogRecPtr  StartPos,
XLogRecPtr  EndPos,
TimeLineID  tli 
)
static

Definition at line 1231 of file xlog.c.

1233{
1234 char *currpos;
1235 int freespace;
1236 int written;
1237 XLogRecPtr CurrPos;
1238 XLogPageHeader pagehdr;
1239
1240 /*
1241 * Get a pointer to the right place in the right WAL buffer to start
1242 * inserting to.
1243 */
1244 CurrPos = StartPos;
1245 currpos = GetXLogBuffer(CurrPos, tli);
1246 freespace = INSERT_FREESPACE(CurrPos);
1247
1248 /*
1249 * there should be enough space for at least the first field (xl_tot_len)
1250 * on this page.
1251 */
1252 Assert(freespace >= sizeof(uint32));
1253
1254 /* Copy record data */
1255 written = 0;
1256 while (rdata != NULL)
1257 {
1258 const char *rdata_data = rdata->data;
1259 int rdata_len = rdata->len;
1260
1261 while (rdata_len > freespace)
1262 {
1263 /*
1264 * Write what fits on this page, and continue on the next page.
1265 */
1266 Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || freespace == 0);
1267 memcpy(currpos, rdata_data, freespace);
1268 rdata_data += freespace;
1269 rdata_len -= freespace;
1270 written += freespace;
1271 CurrPos += freespace;
1272
1273 /*
1274 * Get pointer to beginning of next page, and set the xlp_rem_len
1275 * in the page header. Set XLP_FIRST_IS_CONTRECORD.
1276 *
1277 * It's safe to set the contrecord flag and xlp_rem_len without a
1278 * lock on the page. All the other flags were already set when the
1279 * page was initialized, in AdvanceXLInsertBuffer, and we're the
1280 * only backend that needs to set the contrecord flag.
1281 */
1282 currpos = GetXLogBuffer(CurrPos, tli);
1283 pagehdr = (XLogPageHeader) currpos;
1284 pagehdr->xlp_rem_len = write_len - written;
1286
1287 /* skip over the page header */
1288 if (XLogSegmentOffset(CurrPos, wal_segment_size) == 0)
1289 {
1290 CurrPos += SizeOfXLogLongPHD;
1291 currpos += SizeOfXLogLongPHD;
1292 }
1293 else
1294 {
1295 CurrPos += SizeOfXLogShortPHD;
1296 currpos += SizeOfXLogShortPHD;
1297 }
1298 freespace = INSERT_FREESPACE(CurrPos);
1299 }
1300
1301 Assert(CurrPos % XLOG_BLCKSZ >= SizeOfXLogShortPHD || rdata_len == 0);
1302 memcpy(currpos, rdata_data, rdata_len);
1303 currpos += rdata_len;
1304 CurrPos += rdata_len;
1305 freespace -= rdata_len;
1306 written += rdata_len;
1307
1308 rdata = rdata->next;
1309 }
1310 Assert(written == write_len);
1311
1312 /*
1313 * If this was an xlog-switch, it's not enough to write the switch record,
1314 * we also have to consume all the remaining space in the WAL segment. We
1315 * have already reserved that space, but we need to actually fill it.
1316 */
1317 if (isLogSwitch && XLogSegmentOffset(CurrPos, wal_segment_size) != 0)
1318 {
1319 /* An xlog-switch record doesn't contain any data besides the header */
1320 Assert(write_len == SizeOfXLogRecord);
1321
1322 /* Assert that we did reserve the right amount of space */
1324
1325 /* Use up all the remaining space on the current page */
1326 CurrPos += freespace;
1327
1328 /*
1329 * Cause all remaining pages in the segment to be flushed, leaving the
1330 * XLog position where it should be, at the start of the next segment.
1331 * We do this one page at a time, to make sure we don't deadlock
1332 * against ourselves if wal_buffers < wal_segment_size.
1333 */
1334 while (CurrPos < EndPos)
1335 {
1336 /*
1337 * The minimal action to flush the page would be to call
1338 * WALInsertLockUpdateInsertingAt(CurrPos) followed by
1339 * AdvanceXLInsertBuffer(...). The page would be left initialized
1340 * mostly to zeros, except for the page header (always the short
1341 * variant, as this is never a segment's first page).
1342 *
1343 * The large vistas of zeros are good for compressibility, but the
1344 * headers interrupting them every XLOG_BLCKSZ (with values that
1345 * differ from page to page) are not. The effect varies with
1346 * compression tool, but bzip2 for instance compresses about an
1347 * order of magnitude worse if those headers are left in place.
1348 *
1349 * Rather than complicating AdvanceXLInsertBuffer itself (which is
1350 * called in heavily-loaded circumstances as well as this lightly-
1351 * loaded one) with variant behavior, we just use GetXLogBuffer
1352 * (which itself calls the two methods we need) to get the pointer
1353 * and zero most of the page. Then we just zero the page header.
1354 */
1355 currpos = GetXLogBuffer(CurrPos, tli);
1356 MemSet(currpos, 0, SizeOfXLogShortPHD);
1357
1358 CurrPos += XLOG_BLCKSZ;
1359 }
1360 }
1361 else
1362 {
1363 /* Align the end position, so that the next record starts aligned */
1364 CurrPos = MAXALIGN64(CurrPos);
1365 }
1366
1367 if (CurrPos != EndPos)
1368 ereport(PANIC,
1370 errmsg_internal("space reserved for WAL record does not match what was written"));
1371}
uint32_t uint32
Definition: c.h:543
#define MAXALIGN64(LEN)
Definition: c.h:840
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1170
#define ERRCODE_DATA_CORRUPTED
Definition: pg_basebackup.c:42
const void * data
struct XLogRecData * next
#define INSERT_FREESPACE(endptr)
Definition: xlog.c:581
static char * GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli)
Definition: xlog.c:1638
#define XLP_FIRST_IS_CONTRECORD
Definition: xlog_internal.h:74
#define SizeOfXLogShortPHD
Definition: xlog_internal.h:52

References Assert(), XLogRecData::data, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errmsg_internal(), GetXLogBuffer(), INSERT_FREESPACE, XLogRecData::len, MAXALIGN64, MemSet, XLogRecData::next, PANIC, SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, wal_segment_size, XLogSegmentOffset, XLP_FIRST_IS_CONTRECORD, XLogPageHeaderData::xlp_info, and XLogPageHeaderData::xlp_rem_len.

Referenced by XLogInsertRecord().

◆ CreateCheckPoint()

bool CreateCheckPoint ( int  flags)

Definition at line 6961 of file xlog.c.

6962{
6963 bool shutdown;
6964 CheckPoint checkPoint;
6965 XLogRecPtr recptr;
6966 XLogSegNo _logSegNo;
6968 uint32 freespace;
6969 XLogRecPtr PriorRedoPtr;
6970 XLogRecPtr last_important_lsn;
6971 VirtualTransactionId *vxids;
6972 int nvxids;
6973 int oldXLogAllowed = 0;
6974
6975 /*
6976 * An end-of-recovery checkpoint is really a shutdown checkpoint, just
6977 * issued at a different time.
6978 */
6980 shutdown = true;
6981 else
6982 shutdown = false;
6983
6984 /* sanity check */
6985 if (RecoveryInProgress() && (flags & CHECKPOINT_END_OF_RECOVERY) == 0)
6986 elog(ERROR, "can't create a checkpoint during recovery");
6987
6988 /*
6989 * Prepare to accumulate statistics.
6990 *
6991 * Note: because it is possible for log_checkpoints to change while a
6992 * checkpoint proceeds, we always accumulate stats, even if
6993 * log_checkpoints is currently off.
6994 */
6997
6998 /*
6999 * Let smgr prepare for checkpoint; this has to happen outside the
7000 * critical section and before we determine the REDO pointer. Note that
7001 * smgr must not do anything that'd have to be undone if we decide no
7002 * checkpoint is needed.
7003 */
7005
7006 /*
7007 * Use a critical section to force system panic if we have trouble.
7008 */
7010
7011 if (shutdown)
7012 {
7013 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7016 LWLockRelease(ControlFileLock);
7017 }
7018
7019 /* Begin filling in the checkpoint WAL record */
7020 MemSet(&checkPoint, 0, sizeof(checkPoint));
7021 checkPoint.time = (pg_time_t) time(NULL);
7022
7023 /*
7024 * For Hot Standby, derive the oldestActiveXid before we fix the redo
7025 * pointer. This allows us to begin accumulating changes to assemble our
7026 * starting snapshot of locks and transactions.
7027 */
7028 if (!shutdown && XLogStandbyInfoActive())
7029 checkPoint.oldestActiveXid = GetOldestActiveTransactionId(false, true);
7030 else
7032
7033 /*
7034 * Get location of last important record before acquiring insert locks (as
7035 * GetLastImportantRecPtr() also locks WAL locks).
7036 */
7037 last_important_lsn = GetLastImportantRecPtr();
7038
7039 /*
7040 * If this isn't a shutdown or forced checkpoint, and if there has been no
7041 * WAL activity requiring a checkpoint, skip it. The idea here is to
7042 * avoid inserting duplicate checkpoints when the system is idle.
7043 */
7045 CHECKPOINT_FORCE)) == 0)
7046 {
7047 if (last_important_lsn == ControlFile->checkPoint)
7048 {
7051 (errmsg_internal("checkpoint skipped because system is idle")));
7052 return false;
7053 }
7054 }
7055
7056 /*
7057 * An end-of-recovery checkpoint is created before anyone is allowed to
7058 * write WAL. To allow us to write the checkpoint record, temporarily
7059 * enable XLogInsertAllowed.
7060 */
7061 if (flags & CHECKPOINT_END_OF_RECOVERY)
7062 oldXLogAllowed = LocalSetXLogInsertAllowed();
7063
7065 if (flags & CHECKPOINT_END_OF_RECOVERY)
7067 else
7068 checkPoint.PrevTimeLineID = checkPoint.ThisTimeLineID;
7069
7070 /*
7071 * We must block concurrent insertions while examining insert state.
7072 */
7074
7075 checkPoint.fullPageWrites = Insert->fullPageWrites;
7076 checkPoint.wal_level = wal_level;
7077
7078 if (shutdown)
7079 {
7080 XLogRecPtr curInsert = XLogBytePosToRecPtr(Insert->CurrBytePos);
7081
7082 /*
7083 * Compute new REDO record ptr = location of next XLOG record.
7084 *
7085 * Since this is a shutdown checkpoint, there can't be any concurrent
7086 * WAL insertion.
7087 */
7088 freespace = INSERT_FREESPACE(curInsert);
7089 if (freespace == 0)
7090 {
7091 if (XLogSegmentOffset(curInsert, wal_segment_size) == 0)
7092 curInsert += SizeOfXLogLongPHD;
7093 else
7094 curInsert += SizeOfXLogShortPHD;
7095 }
7096 checkPoint.redo = curInsert;
7097
7098 /*
7099 * Here we update the shared RedoRecPtr for future XLogInsert calls;
7100 * this must be done while holding all the insertion locks.
7101 *
7102 * Note: if we fail to complete the checkpoint, RedoRecPtr will be
7103 * left pointing past where it really needs to point. This is okay;
7104 * the only consequence is that XLogInsert might back up whole buffers
7105 * that it didn't really need to. We can't postpone advancing
7106 * RedoRecPtr because XLogInserts that happen while we are dumping
7107 * buffers must assume that their buffer changes are not included in
7108 * the checkpoint.
7109 */
7110 RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
7111 }
7112
7113 /*
7114 * Now we can release the WAL insertion locks, allowing other xacts to
7115 * proceed while we are flushing disk buffers.
7116 */
7118
7119 /*
7120 * If this is an online checkpoint, we have not yet determined the redo
7121 * point. We do so now by inserting the special XLOG_CHECKPOINT_REDO
7122 * record; the LSN at which it starts becomes the new redo pointer. We
7123 * don't do this for a shutdown checkpoint, because in that case no WAL
7124 * can be written between the redo point and the insertion of the
7125 * checkpoint record itself, so the checkpoint record itself serves to
7126 * mark the redo point.
7127 */
7128 if (!shutdown)
7129 {
7130 /* Include WAL level in record for WAL summarizer's benefit. */
7133 (void) XLogInsert(RM_XLOG_ID, XLOG_CHECKPOINT_REDO);
7134
7135 /*
7136 * XLogInsertRecord will have updated XLogCtl->Insert.RedoRecPtr in
7137 * shared memory and RedoRecPtr in backend-local memory, but we need
7138 * to copy that into the record that will be inserted when the
7139 * checkpoint is complete.
7140 */
7141 checkPoint.redo = RedoRecPtr;
7142 }
7143
7144 /* Update the info_lck-protected copy of RedoRecPtr as well */
7146 XLogCtl->RedoRecPtr = checkPoint.redo;
7148
7149 /*
7150 * If enabled, log checkpoint start. We postpone this until now so as not
7151 * to log anything if we decided to skip the checkpoint.
7152 */
7153 if (log_checkpoints)
7154 LogCheckpointStart(flags, false);
7155
7156 /* Update the process title */
7157 update_checkpoint_display(flags, false, false);
7158
7159 TRACE_POSTGRESQL_CHECKPOINT_START(flags);
7160
7161 /*
7162 * Get the other info we need for the checkpoint record.
7163 *
7164 * We don't need to save oldestClogXid in the checkpoint, it only matters
7165 * for the short period in which clog is being truncated, and if we crash
7166 * during that we'll redo the clog truncation and fix up oldestClogXid
7167 * there.
7168 */
7169 LWLockAcquire(XidGenLock, LW_SHARED);
7170 checkPoint.nextXid = TransamVariables->nextXid;
7171 checkPoint.oldestXid = TransamVariables->oldestXid;
7173 LWLockRelease(XidGenLock);
7174
7175 LWLockAcquire(CommitTsLock, LW_SHARED);
7178 LWLockRelease(CommitTsLock);
7179
7180 LWLockAcquire(OidGenLock, LW_SHARED);
7181 checkPoint.nextOid = TransamVariables->nextOid;
7182 if (!shutdown)
7183 checkPoint.nextOid += TransamVariables->oidCount;
7184 LWLockRelease(OidGenLock);
7185
7186 MultiXactGetCheckptMulti(shutdown,
7187 &checkPoint.nextMulti,
7188 &checkPoint.nextMultiOffset,
7189 &checkPoint.oldestMulti,
7190 &checkPoint.oldestMultiDB);
7191
7192 /*
7193 * Having constructed the checkpoint record, ensure all shmem disk buffers
7194 * and commit-log buffers are flushed to disk.
7195 *
7196 * This I/O could fail for various reasons. If so, we will fail to
7197 * complete the checkpoint, but there is no reason to force a system
7198 * panic. Accordingly, exit critical section while doing it.
7199 */
7201
7202 /*
7203 * In some cases there are groups of actions that must all occur on one
7204 * side or the other of a checkpoint record. Before flushing the
7205 * checkpoint record we must explicitly wait for any backend currently
7206 * performing those groups of actions.
7207 *
7208 * One example is end of transaction, so we must wait for any transactions
7209 * that are currently in commit critical sections. If an xact inserted
7210 * its commit record into XLOG just before the REDO point, then a crash
7211 * restart from the REDO point would not replay that record, which means
7212 * that our flushing had better include the xact's update of pg_xact. So
7213 * we wait till he's out of his commit critical section before proceeding.
7214 * See notes in RecordTransactionCommit().
7215 *
7216 * Because we've already released the insertion locks, this test is a bit
7217 * fuzzy: it is possible that we will wait for xacts we didn't really need
7218 * to wait for. But the delay should be short and it seems better to make
7219 * checkpoint take a bit longer than to hold off insertions longer than
7220 * necessary. (In fact, the whole reason we have this issue is that xact.c
7221 * does commit record XLOG insertion and clog update as two separate steps
7222 * protected by different locks, but again that seems best on grounds of
7223 * minimizing lock contention.)
7224 *
7225 * A transaction that has not yet set delayChkptFlags when we look cannot
7226 * be at risk, since it has not inserted its commit record yet; and one
7227 * that's already cleared it is not at risk either, since it's done fixing
7228 * clog and we will correctly flush the update below. So we cannot miss
7229 * any xacts we need to wait for.
7230 */
7232 if (nvxids > 0)
7233 {
7234 do
7235 {
7236 /*
7237 * Keep absorbing fsync requests while we wait. There could even
7238 * be a deadlock if we don't, if the process that prevents the
7239 * checkpoint is trying to add a request to the queue.
7240 */
7242
7243 pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_START);
7244 pg_usleep(10000L); /* wait for 10 msec */
7246 } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7248 }
7249 pfree(vxids);
7250
7251 CheckPointGuts(checkPoint.redo, flags);
7252
7254 if (nvxids > 0)
7255 {
7256 do
7257 {
7259
7260 pgstat_report_wait_start(WAIT_EVENT_CHECKPOINT_DELAY_COMPLETE);
7261 pg_usleep(10000L); /* wait for 10 msec */
7263 } while (HaveVirtualXIDsDelayingChkpt(vxids, nvxids,
7265 }
7266 pfree(vxids);
7267
7268 /*
7269 * Take a snapshot of running transactions and write this to WAL. This
7270 * allows us to reconstruct the state of running transactions during
7271 * archive recovery, if required. Skip, if this info disabled.
7272 *
7273 * If we are shutting down, or Startup process is completing crash
7274 * recovery we don't need to write running xact data.
7275 */
7276 if (!shutdown && XLogStandbyInfoActive())
7278
7280
7281 /*
7282 * Now insert the checkpoint record into XLOG.
7283 */
7285 XLogRegisterData(&checkPoint, sizeof(checkPoint));
7286 recptr = XLogInsert(RM_XLOG_ID,
7287 shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
7289
7290 XLogFlush(recptr);
7291
7292 /*
7293 * We mustn't write any new WAL after a shutdown checkpoint, or it will be
7294 * overwritten at next startup. No-one should even try, this just allows
7295 * sanity-checking. In the case of an end-of-recovery checkpoint, we want
7296 * to just temporarily disable writing until the system has exited
7297 * recovery.
7298 */
7299 if (shutdown)
7300 {
7301 if (flags & CHECKPOINT_END_OF_RECOVERY)
7302 LocalXLogInsertAllowed = oldXLogAllowed;
7303 else
7304 LocalXLogInsertAllowed = 0; /* never again write WAL */
7305 }
7306
7307 /*
7308 * We now have ProcLastRecPtr = start of actual checkpoint record, recptr
7309 * = end of actual checkpoint record.
7310 */
7311 if (shutdown && checkPoint.redo != ProcLastRecPtr)
7312 ereport(PANIC,
7313 (errmsg("concurrent write-ahead log activity while database system is shutting down")));
7314
7315 /*
7316 * Remember the prior checkpoint's redo ptr for
7317 * UpdateCheckPointDistanceEstimate()
7318 */
7319 PriorRedoPtr = ControlFile->checkPointCopy.redo;
7320
7321 /*
7322 * Update the control file.
7323 */
7324 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7325 if (shutdown)
7328 ControlFile->checkPointCopy = checkPoint;
7329 /* crash recovery should always recover to the end of WAL */
7332
7333 /*
7334 * Persist unloggedLSN value. It's reset on crash recovery, so this goes
7335 * unused on non-shutdown checkpoints, but seems useful to store it always
7336 * for debugging purposes.
7337 */
7339
7341 LWLockRelease(ControlFileLock);
7342
7343 /*
7344 * We are now done with critical updates; no need for system panic if we
7345 * have trouble while fooling with old log segments.
7346 */
7348
7349 /*
7350 * WAL summaries end when the next XLOG_CHECKPOINT_REDO or
7351 * XLOG_CHECKPOINT_SHUTDOWN record is reached. This is the first point
7352 * where (a) we're not inside of a critical section and (b) we can be
7353 * certain that the relevant record has been flushed to disk, which must
7354 * happen before it can be summarized.
7355 *
7356 * If this is a shutdown checkpoint, then this happens reasonably
7357 * promptly: we've only just inserted and flushed the
7358 * XLOG_CHECKPOINT_SHUTDOWN record. If this is not a shutdown checkpoint,
7359 * then this might not be very prompt at all: the XLOG_CHECKPOINT_REDO
7360 * record was written before we began flushing data to disk, and that
7361 * could be many minutes ago at this point. However, we don't XLogFlush()
7362 * after inserting that record, so we're not guaranteed that it's on disk
7363 * until after the above call that flushes the XLOG_CHECKPOINT_ONLINE
7364 * record.
7365 */
7367
7368 /*
7369 * Let smgr do post-checkpoint cleanup (eg, deleting old files).
7370 */
7372
7373 /*
7374 * Update the average distance between checkpoints if the prior checkpoint
7375 * exists.
7376 */
7377 if (XLogRecPtrIsValid(PriorRedoPtr))
7379
7380 INJECTION_POINT("checkpoint-before-old-wal-removal", NULL);
7381
7382 /*
7383 * Delete old log files, those no longer needed for last checkpoint to
7384 * prevent the disk holding the xlog from growing full.
7385 */
7387 KeepLogSeg(recptr, &_logSegNo);
7389 _logSegNo, InvalidOid,
7391 {
7392 /*
7393 * Some slots have been invalidated; recalculate the old-segment
7394 * horizon, starting again from RedoRecPtr.
7395 */
7397 KeepLogSeg(recptr, &_logSegNo);
7398 }
7399 _logSegNo--;
7400 RemoveOldXlogFiles(_logSegNo, RedoRecPtr, recptr,
7401 checkPoint.ThisTimeLineID);
7402
7403 /*
7404 * Make more log segments if needed. (Do this after recycling old log
7405 * segments, since that may supply some of the needed files.)
7406 */
7407 if (!shutdown)
7408 PreallocXlogFiles(recptr, checkPoint.ThisTimeLineID);
7409
7410 /*
7411 * Truncate pg_subtrans if possible. We can throw away all data before
7412 * the oldest XMIN of any running transaction. No future transaction will
7413 * attempt to reference any pg_subtrans entry older than that (see Asserts
7414 * in subtrans.c). During recovery, though, we mustn't do this because
7415 * StartupSUBTRANS hasn't been called yet.
7416 */
7417 if (!RecoveryInProgress())
7419
7420 /* Real work is done; log and update stats. */
7421 LogCheckpointEnd(false);
7422
7423 /* Reset the process title */
7424 update_checkpoint_display(flags, false, true);
7425
7426 TRACE_POSTGRESQL_CHECKPOINT_DONE(CheckpointStats.ckpt_bufs_written,
7427 NBuffers,
7431
7432 return true;
7433}
static uint64 pg_atomic_read_membarrier_u64(volatile pg_atomic_uint64 *ptr)
Definition: atomics.h:474
void AbsorbSyncRequests(void)
int NBuffers
Definition: globals.c:142
#define INJECTION_POINT(name, arg)
@ LW_SHARED
Definition: lwlock.h:113
#define START_CRIT_SECTION()
Definition: miscadmin.h:150
#define END_CRIT_SECTION()
Definition: miscadmin.h:152
void MultiXactGetCheckptMulti(bool is_shutdown, MultiXactId *nextMulti, MultiXactOffset *nextMultiOffset, MultiXactId *oldestMulti, Oid *oldestMultiDB)
Definition: multixact.c:2212
#define XLOG_CHECKPOINT_REDO
Definition: pg_control.h:82
@ DB_SHUTDOWNING
Definition: pg_control.h:94
@ DB_SHUTDOWNED
Definition: pg_control.h:92
#define XLOG_CHECKPOINT_ONLINE
Definition: pg_control.h:69
#define InvalidOid
Definition: postgres_ext.h:37
#define DELAY_CHKPT_START
Definition: proc.h:135
#define DELAY_CHKPT_COMPLETE
Definition: proc.h:136
TransactionId GetOldestTransactionIdConsideredRunning(void)
Definition: procarray.c:1982
bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids, int type)
Definition: procarray.c:3051
TransactionId GetOldestActiveTransactionId(bool inCommitOnly, bool allDbs)
Definition: procarray.c:2833
VirtualTransactionId * GetVirtualXIDsDelayingChkpt(int *nvxids, int type)
Definition: procarray.c:3005
void pg_usleep(long microsec)
Definition: signal.c:53
bool InvalidateObsoleteReplicationSlots(uint32 possible_causes, XLogSegNo oldestSegno, Oid dboid, TransactionId snapshotConflictHorizon)
Definition: slot.c:2065
@ RS_INVAL_WAL_REMOVED
Definition: slot.h:62
@ RS_INVAL_IDLE_TIMEOUT
Definition: slot.h:68
XLogRecPtr LogStandbySnapshot(void)
Definition: standby.c:1282
TimestampTz ckpt_start_t
Definition: xlog.h:161
int ckpt_segs_removed
Definition: xlog.h:171
int ckpt_segs_added
Definition: xlog.h:170
int ckpt_bufs_written
Definition: xlog.h:167
int ckpt_segs_recycled
Definition: xlog.h:172
XLogRecPtr minRecoveryPoint
Definition: pg_control.h:168
XLogRecPtr unloggedLSN
Definition: pg_control.h:137
TimeLineID minRecoveryPointTLI
Definition: pg_control.h:169
TransactionId oldestCommitTsXid
Definition: transam.h:232
TransactionId newestCommitTsXid
Definition: transam.h:233
TransactionId oldestXid
Definition: transam.h:222
TimeLineID InsertTimeLineID
Definition: xlog.c:510
XLogRecPtr RedoRecPtr
Definition: xlog.c:458
TimeLineID PrevTimeLineID
Definition: xlog.c:511
pg_atomic_uint64 unloggedLSN
Definition: xlog.c:465
XLogRecPtr RedoRecPtr
Definition: xlog.c:432
void TruncateSUBTRANS(TransactionId oldestXact)
Definition: subtrans.c:385
void SyncPreCheckpoint(void)
Definition: sync.c:177
void SyncPostCheckpoint(void)
Definition: sync.c:202
void WakeupWalSummarizer(void)
XLogRecPtr ProcLastRecPtr
Definition: xlog.c:255
bool RecoveryInProgress(void)
Definition: xlog.c:6406
static void WALInsertLockRelease(void)
Definition: xlog.c:1451
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos)
Definition: xlog.c:1864
static void WALInsertLockAcquireExclusive(void)
Definition: xlog.c:1422
static void UpdateControlFile(void)
Definition: xlog.c:4600
static void RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr, TimeLineID insertTLI)
Definition: xlog.c:3884
static void LogCheckpointStart(int flags, bool restartpoint)
Definition: xlog.c:6721
static XLogRecPtr RedoRecPtr
Definition: xlog.c:275
static void LogCheckpointEnd(bool restartpoint)
Definition: xlog.c:6753
static void PreallocXlogFiles(XLogRecPtr endptr, TimeLineID tli)
Definition: xlog.c:3709
bool log_checkpoints
Definition: xlog.c:131
static void KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
Definition: xlog.c:8021
static int LocalSetXLogInsertAllowed(void)
Definition: xlog.c:6494
XLogRecPtr GetLastImportantRecPtr(void)
Definition: xlog.c:6628
static void UpdateCheckPointDistanceEstimate(uint64 nbytes)
Definition: xlog.c:6858
static int LocalXLogInsertAllowed
Definition: xlog.c:238
void XLogFlush(XLogRecPtr record)
Definition: xlog.c:2783
static void CheckPointGuts(XLogRecPtr checkPointRedo, int flags)
Definition: xlog.c:7579
static void update_checkpoint_display(int flags, bool restartpoint, bool reset)
Definition: xlog.c:6896
#define CHECKPOINT_END_OF_RECOVERY
Definition: xlog.h:140
#define CHECKPOINT_FORCE
Definition: xlog.h:142
#define XLogStandbyInfoActive()
Definition: xlog.h:123
#define XLByteToSeg(xlrp, logSegNo, wal_segsz_bytes)
#define XLogRecPtrIsValid(r)
Definition: xlogdefs.h:29
XLogRecPtr XLogInsert(RmgrId rmid, uint8 info)
Definition: xloginsert.c:478
void XLogRegisterData(const void *data, uint32 len)
Definition: xloginsert.c:368
void XLogBeginInsert(void)
Definition: xloginsert.c:152

References AbsorbSyncRequests(), ControlFileData::checkPoint, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_start_t, ControlFile, DB_SHUTDOWNED, DB_SHUTDOWNING, DEBUG1, DELAY_CHKPT_COMPLETE, DELAY_CHKPT_START, elog, END_CRIT_SECTION, ereport, errmsg(), errmsg_internal(), ERROR, CheckPoint::fullPageWrites, GetCurrentTimestamp(), GetLastImportantRecPtr(), GetOldestActiveTransactionId(), GetOldestTransactionIdConsideredRunning(), GetVirtualXIDsDelayingChkpt(), HaveVirtualXIDsDelayingChkpt(), XLogCtlData::info_lck, INJECTION_POINT, XLogCtlData::Insert, Insert(), INSERT_FREESPACE, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, KeepLogSeg(), LocalSetXLogInsertAllowed(), LocalXLogInsertAllowed, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LogStandbySnapshot(), LW_EXCLUSIVE, LW_SHARED, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactGetCheckptMulti(), NBuffers, TransamVariablesData::newestCommitTsXid, CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, TransamVariablesData::oldestCommitTsXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, TransamVariablesData::oldestXid, CheckPoint::oldestXid, TransamVariablesData::oldestXidDB, CheckPoint::oldestXidDB, PANIC, pfree(), pg_atomic_read_membarrier_u64(), pg_usleep(), pgstat_report_wait_end(), pgstat_report_wait_start(), PreallocXlogFiles(), XLogCtlData::PrevTimeLineID, CheckPoint::PrevTimeLineID, ProcLastRecPtr, RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_IDLE_TIMEOUT, RS_INVAL_WAL_REMOVED, SizeOfXLogLongPHD, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, ControlFileData::state, SyncPostCheckpoint(), SyncPreCheckpoint(), CheckPoint::ThisTimeLineID, CheckPoint::time, TransamVariables, TruncateSUBTRANS(), XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), WakeupWalSummarizer(), wal_level, CheckPoint::wal_level, wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLogBeginInsert(), XLogBytePosToRecPtr(), XLogCtl, XLogFlush(), XLogInsert(), XLogRecPtrIsValid, XLogRegisterData(), XLogSegmentOffset, and XLogStandbyInfoActive.

Referenced by CheckpointerMain(), RequestCheckpoint(), and ShutdownXLOG().

◆ CreateEndOfRecoveryRecord()

static void CreateEndOfRecoveryRecord ( void  )
static

Definition at line 7444 of file xlog.c.

7445{
7446 xl_end_of_recovery xlrec;
7447 XLogRecPtr recptr;
7448
7449 /* sanity check */
7450 if (!RecoveryInProgress())
7451 elog(ERROR, "can only be used to end recovery");
7452
7453 xlrec.end_time = GetCurrentTimestamp();
7454 xlrec.wal_level = wal_level;
7455
7460
7462
7464 XLogRegisterData(&xlrec, sizeof(xl_end_of_recovery));
7465 recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY);
7466
7467 XLogFlush(recptr);
7468
7469 /*
7470 * Update the control file so that crash recovery can follow the timeline
7471 * changes to this point.
7472 */
7473 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7474 ControlFile->minRecoveryPoint = recptr;
7477 LWLockRelease(ControlFileLock);
7478
7480}
#define XLOG_END_OF_RECOVERY
Definition: pg_control.h:77
TimeLineID PrevTimeLineID
TimestampTz end_time
TimeLineID ThisTimeLineID

References ControlFile, elog, END_CRIT_SECTION, xl_end_of_recovery::end_time, ERROR, GetCurrentTimestamp(), XLogCtlData::InsertTimeLineID, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, XLogCtlData::PrevTimeLineID, xl_end_of_recovery::PrevTimeLineID, RecoveryInProgress(), START_CRIT_SECTION, xl_end_of_recovery::ThisTimeLineID, UpdateControlFile(), wal_level, xl_end_of_recovery::wal_level, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_END_OF_RECOVERY, XLogBeginInsert(), XLogCtl, XLogFlush(), XLogInsert(), and XLogRegisterData().

Referenced by PerformRecoveryXLogAction().

◆ CreateOverwriteContrecordRecord()

static XLogRecPtr CreateOverwriteContrecordRecord ( XLogRecPtr  aborted_lsn,
XLogRecPtr  pagePtr,
TimeLineID  newTLI 
)
static

Definition at line 7509 of file xlog.c.

7511{
7513 XLogRecPtr recptr;
7514 XLogPageHeader pagehdr;
7515 XLogRecPtr startPos;
7516
7517 /* sanity checks */
7518 if (!RecoveryInProgress())
7519 elog(ERROR, "can only be used at end of recovery");
7520 if (pagePtr % XLOG_BLCKSZ != 0)
7521 elog(ERROR, "invalid position for missing continuation record %X/%08X",
7522 LSN_FORMAT_ARGS(pagePtr));
7523
7524 /* The current WAL insert position should be right after the page header */
7525 startPos = pagePtr;
7526 if (XLogSegmentOffset(startPos, wal_segment_size) == 0)
7527 startPos += SizeOfXLogLongPHD;
7528 else
7529 startPos += SizeOfXLogShortPHD;
7530 recptr = GetXLogInsertRecPtr();
7531 if (recptr != startPos)
7532 elog(ERROR, "invalid WAL insert position %X/%08X for OVERWRITE_CONTRECORD",
7533 LSN_FORMAT_ARGS(recptr));
7534
7536
7537 /*
7538 * Initialize the XLOG page header (by GetXLogBuffer), and set the
7539 * XLP_FIRST_IS_OVERWRITE_CONTRECORD flag.
7540 *
7541 * No other backend is allowed to write WAL yet, so acquiring the WAL
7542 * insertion lock is just pro forma.
7543 */
7545 pagehdr = (XLogPageHeader) GetXLogBuffer(pagePtr, newTLI);
7548
7549 /*
7550 * Insert the XLOG_OVERWRITE_CONTRECORD record as the first record on the
7551 * page. We know it becomes the first record, because no other backend is
7552 * allowed to write WAL yet.
7553 */
7555 xlrec.overwritten_lsn = aborted_lsn;
7558 recptr = XLogInsert(RM_XLOG_ID, XLOG_OVERWRITE_CONTRECORD);
7559
7560 /* check that the record was inserted to the right place */
7561 if (ProcLastRecPtr != startPos)
7562 elog(ERROR, "OVERWRITE_CONTRECORD was inserted to unexpected position %X/%08X",
7564
7565 XLogFlush(recptr);
7566
7568
7569 return recptr;
7570}
#define XLOG_OVERWRITE_CONTRECORD
Definition: pg_control.h:81
static void WALInsertLockAcquire(void)
Definition: xlog.c:1377
XLogRecPtr GetXLogInsertRecPtr(void)
Definition: xlog.c:9499
#define XLP_FIRST_IS_OVERWRITE_CONTRECORD
Definition: xlog_internal.h:80

References elog, END_CRIT_SECTION, ERROR, GetCurrentTimestamp(), GetXLogBuffer(), GetXLogInsertRecPtr(), LSN_FORMAT_ARGS, xl_overwrite_contrecord::overwrite_time, xl_overwrite_contrecord::overwritten_lsn, ProcLastRecPtr, RecoveryInProgress(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, START_CRIT_SECTION, wal_segment_size, WALInsertLockAcquire(), WALInsertLockRelease(), XLOG_OVERWRITE_CONTRECORD, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogRegisterData(), XLogSegmentOffset, XLP_FIRST_IS_OVERWRITE_CONTRECORD, and XLogPageHeaderData::xlp_info.

Referenced by StartupXLOG().

◆ CreateRestartPoint()

bool CreateRestartPoint ( int  flags)

Definition at line 7659 of file xlog.c.

7660{
7661 XLogRecPtr lastCheckPointRecPtr;
7662 XLogRecPtr lastCheckPointEndPtr;
7663 CheckPoint lastCheckPoint;
7664 XLogRecPtr PriorRedoPtr;
7665 XLogRecPtr receivePtr;
7666 XLogRecPtr replayPtr;
7667 TimeLineID replayTLI;
7668 XLogRecPtr endptr;
7669 XLogSegNo _logSegNo;
7670 TimestampTz xtime;
7671
7672 /* Concurrent checkpoint/restartpoint cannot happen */
7674
7675 /* Get a local copy of the last safe checkpoint record. */
7677 lastCheckPointRecPtr = XLogCtl->lastCheckPointRecPtr;
7678 lastCheckPointEndPtr = XLogCtl->lastCheckPointEndPtr;
7679 lastCheckPoint = XLogCtl->lastCheckPoint;
7681
7682 /*
7683 * Check that we're still in recovery mode. It's ok if we exit recovery
7684 * mode after this check, the restart point is valid anyway.
7685 */
7686 if (!RecoveryInProgress())
7687 {
7689 (errmsg_internal("skipping restartpoint, recovery has already ended")));
7690 return false;
7691 }
7692
7693 /*
7694 * If the last checkpoint record we've replayed is already our last
7695 * restartpoint, we can't perform a new restart point. We still update
7696 * minRecoveryPoint in that case, so that if this is a shutdown restart
7697 * point, we won't start up earlier than before. That's not strictly
7698 * necessary, but when hot standby is enabled, it would be rather weird if
7699 * the database opened up for read-only connections at a point-in-time
7700 * before the last shutdown. Such time travel is still possible in case of
7701 * immediate shutdown, though.
7702 *
7703 * We don't explicitly advance minRecoveryPoint when we do create a
7704 * restartpoint. It's assumed that flushing the buffers will do that as a
7705 * side-effect.
7706 */
7707 if (!XLogRecPtrIsValid(lastCheckPointRecPtr) ||
7708 lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
7709 {
7711 errmsg_internal("skipping restartpoint, already performed at %X/%08X",
7712 LSN_FORMAT_ARGS(lastCheckPoint.redo)));
7713
7715 if (flags & CHECKPOINT_IS_SHUTDOWN)
7716 {
7717 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7720 LWLockRelease(ControlFileLock);
7721 }
7722 return false;
7723 }
7724
7725 /*
7726 * Update the shared RedoRecPtr so that the startup process can calculate
7727 * the number of segments replayed since last restartpoint, and request a
7728 * restartpoint if it exceeds CheckPointSegments.
7729 *
7730 * Like in CreateCheckPoint(), hold off insertions to update it, although
7731 * during recovery this is just pro forma, because no WAL insertions are
7732 * happening.
7733 */
7735 RedoRecPtr = XLogCtl->Insert.RedoRecPtr = lastCheckPoint.redo;
7737
7738 /* Also update the info_lck-protected copy */
7740 XLogCtl->RedoRecPtr = lastCheckPoint.redo;
7742
7743 /*
7744 * Prepare to accumulate statistics.
7745 *
7746 * Note: because it is possible for log_checkpoints to change while a
7747 * checkpoint proceeds, we always accumulate stats, even if
7748 * log_checkpoints is currently off.
7749 */
7752
7753 if (log_checkpoints)
7754 LogCheckpointStart(flags, true);
7755
7756 /* Update the process title */
7757 update_checkpoint_display(flags, true, false);
7758
7759 CheckPointGuts(lastCheckPoint.redo, flags);
7760
7761 /*
7762 * This location needs to be after CheckPointGuts() to ensure that some
7763 * work has already happened during this checkpoint.
7764 */
7765 INJECTION_POINT("create-restart-point", NULL);
7766
7767 /*
7768 * Remember the prior checkpoint's redo ptr for
7769 * UpdateCheckPointDistanceEstimate()
7770 */
7771 PriorRedoPtr = ControlFile->checkPointCopy.redo;
7772
7773 /*
7774 * Update pg_control, using current time. Check that it still shows an
7775 * older checkpoint, else do nothing; this is a quick hack to make sure
7776 * nothing really bad happens if somehow we get here after the
7777 * end-of-recovery checkpoint.
7778 */
7779 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
7780 if (ControlFile->checkPointCopy.redo < lastCheckPoint.redo)
7781 {
7782 /*
7783 * Update the checkpoint information. We do this even if the cluster
7784 * does not show DB_IN_ARCHIVE_RECOVERY to match with the set of WAL
7785 * segments recycled below.
7786 */
7787 ControlFile->checkPoint = lastCheckPointRecPtr;
7788 ControlFile->checkPointCopy = lastCheckPoint;
7789
7790 /*
7791 * Ensure minRecoveryPoint is past the checkpoint record and update it
7792 * if the control file still shows DB_IN_ARCHIVE_RECOVERY. Normally,
7793 * this will have happened already while writing out dirty buffers,
7794 * but not necessarily - e.g. because no buffers were dirtied. We do
7795 * this because a backup performed in recovery uses minRecoveryPoint
7796 * to determine which WAL files must be included in the backup, and
7797 * the file (or files) containing the checkpoint record must be
7798 * included, at a minimum. Note that for an ordinary restart of
7799 * recovery there's no value in having the minimum recovery point any
7800 * earlier than this anyway, because redo will begin just after the
7801 * checkpoint record.
7802 */
7804 {
7805 if (ControlFile->minRecoveryPoint < lastCheckPointEndPtr)
7806 {
7807 ControlFile->minRecoveryPoint = lastCheckPointEndPtr;
7809
7810 /* update local copy */
7813 }
7814 if (flags & CHECKPOINT_IS_SHUTDOWN)
7816 }
7818 }
7819 LWLockRelease(ControlFileLock);
7820
7821 /*
7822 * Update the average distance between checkpoints/restartpoints if the
7823 * prior checkpoint exists.
7824 */
7825 if (XLogRecPtrIsValid(PriorRedoPtr))
7827
7828 /*
7829 * Delete old log files, those no longer needed for last restartpoint to
7830 * prevent the disk holding the xlog from growing full.
7831 */
7833
7834 /*
7835 * Retreat _logSegNo using the current end of xlog replayed or received,
7836 * whichever is later.
7837 */
7838 receivePtr = GetWalRcvFlushRecPtr(NULL, NULL);
7839 replayPtr = GetXLogReplayRecPtr(&replayTLI);
7840 endptr = (receivePtr < replayPtr) ? replayPtr : receivePtr;
7841 KeepLogSeg(endptr, &_logSegNo);
7843 _logSegNo, InvalidOid,
7845 {
7846 /*
7847 * Some slots have been invalidated; recalculate the old-segment
7848 * horizon, starting again from RedoRecPtr.
7849 */
7851 KeepLogSeg(endptr, &_logSegNo);
7852 }
7853 _logSegNo--;
7854
7855 /*
7856 * Try to recycle segments on a useful timeline. If we've been promoted
7857 * since the beginning of this restartpoint, use the new timeline chosen
7858 * at end of recovery. If we're still in recovery, use the timeline we're
7859 * currently replaying.
7860 *
7861 * There is no guarantee that the WAL segments will be useful on the
7862 * current timeline; if recovery proceeds to a new timeline right after
7863 * this, the pre-allocated WAL segments on this timeline will not be used,
7864 * and will go wasted until recycled on the next restartpoint. We'll live
7865 * with that.
7866 */
7867 if (!RecoveryInProgress())
7868 replayTLI = XLogCtl->InsertTimeLineID;
7869
7870 RemoveOldXlogFiles(_logSegNo, RedoRecPtr, endptr, replayTLI);
7871
7872 /*
7873 * Make more log segments if needed. (Do this after recycling old log
7874 * segments, since that may supply some of the needed files.)
7875 */
7876 PreallocXlogFiles(endptr, replayTLI);
7877
7878 /*
7879 * Truncate pg_subtrans if possible. We can throw away all data before
7880 * the oldest XMIN of any running transaction. No future transaction will
7881 * attempt to reference any pg_subtrans entry older than that (see Asserts
7882 * in subtrans.c). When hot standby is disabled, though, we mustn't do
7883 * this because StartupSUBTRANS hasn't been called yet.
7884 */
7885 if (EnableHotStandby)
7887
7888 /* Real work is done; log and update stats. */
7889 LogCheckpointEnd(true);
7890
7891 /* Reset the process title */
7892 update_checkpoint_display(flags, true, true);
7893
7894 xtime = GetLatestXTime();
7896 errmsg("recovery restart point at %X/%08X",
7897 LSN_FORMAT_ARGS(lastCheckPoint.redo)),
7898 xtime ? errdetail("Last completed transaction was at log time %s.",
7899 timestamptz_to_str(xtime)) : 0);
7900
7901 /*
7902 * Finally, execute archive_cleanup_command, if any.
7903 */
7904 if (archiveCleanupCommand && strcmp(archiveCleanupCommand, "") != 0)
7906 "archive_cleanup_command",
7907 false,
7908 WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND);
7909
7910 return true;
7911}
const char * timestamptz_to_str(TimestampTz t)
Definition: timestamp.c:1862
int64 TimestampTz
Definition: timestamp.h:39
bool IsUnderPostmaster
Definition: globals.c:120
@ B_CHECKPOINTER
Definition: miscadmin.h:363
BackendType MyBackendType
Definition: miscinit.c:64
@ DB_IN_ARCHIVE_RECOVERY
Definition: pg_control.h:96
@ DB_SHUTDOWNED_IN_RECOVERY
Definition: pg_control.h:93
CheckPoint lastCheckPoint
Definition: xlog.c:546
XLogRecPtr lastCheckPointRecPtr
Definition: xlog.c:544
XLogRecPtr lastCheckPointEndPtr
Definition: xlog.c:545
XLogRecPtr GetWalRcvFlushRecPtr(XLogRecPtr *latestChunkStart, TimeLineID *receiveTLI)
static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
Definition: xlog.c:2703
static XLogRecPtr LocalMinRecoveryPoint
Definition: xlog.c:647
static TimeLineID LocalMinRecoveryPointTLI
Definition: xlog.c:648
uint32 TimeLineID
Definition: xlogdefs.h:63
char * archiveCleanupCommand
Definition: xlogrecovery.c:87
XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI)
TimestampTz GetLatestXTime(void)

References archiveCleanupCommand, Assert(), B_CHECKPOINTER, ControlFileData::checkPoint, CHECKPOINT_IS_SHUTDOWN, ControlFileData::checkPointCopy, CheckPointGuts(), CheckpointStats, CheckpointStatsData::ckpt_start_t, ControlFile, DB_IN_ARCHIVE_RECOVERY, DB_SHUTDOWNED_IN_RECOVERY, DEBUG2, EnableHotStandby, ereport, errdetail(), errmsg(), errmsg_internal(), ExecuteRecoveryCommand(), GetCurrentTimestamp(), GetLatestXTime(), GetOldestTransactionIdConsideredRunning(), GetWalRcvFlushRecPtr(), GetXLogReplayRecPtr(), XLogCtlData::info_lck, INJECTION_POINT, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, InvalidXLogRecPtr, IsUnderPostmaster, KeepLogSeg(), XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LOG, log_checkpoints, LogCheckpointEnd(), LogCheckpointStart(), LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MemSet, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyBackendType, PreallocXlogFiles(), RecoveryInProgress(), CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RemoveOldXlogFiles(), RS_INVAL_IDLE_TIMEOUT, RS_INVAL_WAL_REMOVED, SpinLockAcquire, SpinLockRelease, ControlFileData::state, CheckPoint::ThisTimeLineID, timestamptz_to_str(), TruncateSUBTRANS(), update_checkpoint_display(), UpdateCheckPointDistanceEstimate(), UpdateControlFile(), UpdateMinRecoveryPoint(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLByteToSeg, XLogCtl, and XLogRecPtrIsValid.

Referenced by CheckpointerMain(), and ShutdownXLOG().

◆ DataChecksumsEnabled()

◆ do_pg_abort_backup()

void do_pg_abort_backup ( int  code,
Datum  arg 
)

Definition at line 9458 of file xlog.c.

9459{
9460 bool during_backup_start = DatumGetBool(arg);
9461
9462 /* If called during backup start, there shouldn't be one already running */
9463 Assert(!during_backup_start || sessionBackupState == SESSION_BACKUP_NONE);
9464
9465 if (during_backup_start || sessionBackupState != SESSION_BACKUP_NONE)
9466 {
9470
9473
9474 if (!during_backup_start)
9476 errmsg("aborting backup due to backend exiting before pg_backup_stop was called"));
9477 }
9478}
#define WARNING
Definition: elog.h:36
void * arg
static bool DatumGetBool(Datum X)
Definition: postgres.h:100
int runningBackups
Definition: xlog.c:440
static SessionBackupState sessionBackupState
Definition: xlog.c:393
@ SESSION_BACKUP_NONE
Definition: xlog.h:290

References arg, Assert(), DatumGetBool(), ereport, errmsg(), XLogCtlData::Insert, XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, and XLogCtl.

Referenced by do_pg_backup_start(), perform_base_backup(), and register_persistent_abort_backup_handler().

◆ do_pg_backup_start()

void do_pg_backup_start ( const char *  backupidstr,
bool  fast,
List **  tablespaces,
BackupState state,
StringInfo  tblspcmapfile 
)

Definition at line 8856 of file xlog.c.

8858{
8860
8861 Assert(state != NULL);
8863
8864 /*
8865 * During recovery, we don't need to check WAL level. Because, if WAL
8866 * level is not sufficient, it's impossible to get here during recovery.
8867 */
8869 ereport(ERROR,
8870 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8871 errmsg("WAL level not sufficient for making an online backup"),
8872 errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
8873
8874 if (strlen(backupidstr) > MAXPGPATH)
8875 ereport(ERROR,
8876 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8877 errmsg("backup label too long (max %d bytes)",
8878 MAXPGPATH)));
8879
8880 strlcpy(state->name, backupidstr, sizeof(state->name));
8881
8882 /*
8883 * Mark backup active in shared memory. We must do full-page WAL writes
8884 * during an on-line backup even if not doing so at other times, because
8885 * it's quite possible for the backup dump to obtain a "torn" (partially
8886 * written) copy of a database page if it reads the page concurrently with
8887 * our write to the same page. This can be fixed as long as the first
8888 * write to the page in the WAL sequence is a full-page write. Hence, we
8889 * increment runningBackups then force a CHECKPOINT, to ensure there are
8890 * no dirty pages in shared memory that might get dumped while the backup
8891 * is in progress without having a corresponding WAL record. (Once the
8892 * backup is complete, we need not force full-page writes anymore, since
8893 * we expect that any pages not modified during the backup interval must
8894 * have been correctly captured by the backup.)
8895 *
8896 * Note that forcing full-page writes has no effect during an online
8897 * backup from the standby.
8898 *
8899 * We must hold all the insertion locks to change the value of
8900 * runningBackups, to ensure adequate interlocking against
8901 * XLogInsertRecord().
8902 */
8906
8907 /*
8908 * Ensure we decrement runningBackups if we fail below. NB -- for this to
8909 * work correctly, it is critical that sessionBackupState is only updated
8910 * after this block is over.
8911 */
8913 {
8914 bool gotUniqueStartpoint = false;
8915 DIR *tblspcdir;
8916 struct dirent *de;
8917 tablespaceinfo *ti;
8918 int datadirpathlen;
8919
8920 /*
8921 * Force an XLOG file switch before the checkpoint, to ensure that the
8922 * WAL segment the checkpoint is written to doesn't contain pages with
8923 * old timeline IDs. That would otherwise happen if you called
8924 * pg_backup_start() right after restoring from a PITR archive: the
8925 * first WAL segment containing the startup checkpoint has pages in
8926 * the beginning with the old timeline ID. That can cause trouble at
8927 * recovery: we won't have a history file covering the old timeline if
8928 * pg_wal directory was not included in the base backup and the WAL
8929 * archive was cleared too before starting the backup.
8930 *
8931 * This also ensures that we have emitted a WAL page header that has
8932 * XLP_BKP_REMOVABLE off before we emit the checkpoint record.
8933 * Therefore, if a WAL archiver (such as pglesslog) is trying to
8934 * compress out removable backup blocks, it won't remove any that
8935 * occur after this point.
8936 *
8937 * During recovery, we skip forcing XLOG file switch, which means that
8938 * the backup taken during recovery is not available for the special
8939 * recovery case described above.
8940 */
8942 RequestXLogSwitch(false);
8943
8944 do
8945 {
8946 bool checkpointfpw;
8947
8948 /*
8949 * Force a CHECKPOINT. Aside from being necessary to prevent torn
8950 * page problems, this guarantees that two successive backup runs
8951 * will have different checkpoint positions and hence different
8952 * history file names, even if nothing happened in between.
8953 *
8954 * During recovery, establish a restartpoint if possible. We use
8955 * the last restartpoint as the backup starting checkpoint. This
8956 * means that two successive backup runs can have same checkpoint
8957 * positions.
8958 *
8959 * Since the fact that we are executing do_pg_backup_start()
8960 * during recovery means that checkpointer is running, we can use
8961 * RequestCheckpoint() to establish a restartpoint.
8962 *
8963 * We use CHECKPOINT_FAST only if requested by user (via passing
8964 * fast = true). Otherwise this can take awhile.
8965 */
8967 (fast ? CHECKPOINT_FAST : 0));
8968
8969 /*
8970 * Now we need to fetch the checkpoint record location, and also
8971 * its REDO pointer. The oldest point in WAL that would be needed
8972 * to restore starting from the checkpoint is precisely the REDO
8973 * pointer.
8974 */
8975 LWLockAcquire(ControlFileLock, LW_SHARED);
8976 state->checkpointloc = ControlFile->checkPoint;
8977 state->startpoint = ControlFile->checkPointCopy.redo;
8979 checkpointfpw = ControlFile->checkPointCopy.fullPageWrites;
8980 LWLockRelease(ControlFileLock);
8981
8983 {
8984 XLogRecPtr recptr;
8985
8986 /*
8987 * Check to see if all WAL replayed during online backup
8988 * (i.e., since last restartpoint used as backup starting
8989 * checkpoint) contain full-page writes.
8990 */
8992 recptr = XLogCtl->lastFpwDisableRecPtr;
8994
8995 if (!checkpointfpw || state->startpoint <= recptr)
8996 ereport(ERROR,
8997 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
8998 errmsg("WAL generated with \"full_page_writes=off\" was replayed "
8999 "since last restartpoint"),
9000 errhint("This means that the backup being taken on the standby "
9001 "is corrupt and should not be used. "
9002 "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
9003 "and then try an online backup again.")));
9004
9005 /*
9006 * During recovery, since we don't use the end-of-backup WAL
9007 * record and don't write the backup history file, the
9008 * starting WAL location doesn't need to be unique. This means
9009 * that two base backups started at the same time might use
9010 * the same checkpoint as starting locations.
9011 */
9012 gotUniqueStartpoint = true;
9013 }
9014
9015 /*
9016 * If two base backups are started at the same time (in WAL sender
9017 * processes), we need to make sure that they use different
9018 * checkpoints as starting locations, because we use the starting
9019 * WAL location as a unique identifier for the base backup in the
9020 * end-of-backup WAL record and when we write the backup history
9021 * file. Perhaps it would be better generate a separate unique ID
9022 * for each backup instead of forcing another checkpoint, but
9023 * taking a checkpoint right after another is not that expensive
9024 * either because only few buffers have been dirtied yet.
9025 */
9027 if (XLogCtl->Insert.lastBackupStart < state->startpoint)
9028 {
9029 XLogCtl->Insert.lastBackupStart = state->startpoint;
9030 gotUniqueStartpoint = true;
9031 }
9033 } while (!gotUniqueStartpoint);
9034
9035 /*
9036 * Construct tablespace_map file.
9037 */
9038 datadirpathlen = strlen(DataDir);
9039
9040 /* Collect information about all tablespaces */
9041 tblspcdir = AllocateDir(PG_TBLSPC_DIR);
9042 while ((de = ReadDir(tblspcdir, PG_TBLSPC_DIR)) != NULL)
9043 {
9044 char fullpath[MAXPGPATH + sizeof(PG_TBLSPC_DIR)];
9045 char linkpath[MAXPGPATH];
9046 char *relpath = NULL;
9047 char *s;
9048 PGFileType de_type;
9049 char *badp;
9050 Oid tsoid;
9051
9052 /*
9053 * Try to parse the directory name as an unsigned integer.
9054 *
9055 * Tablespace directories should be positive integers that can be
9056 * represented in 32 bits, with no leading zeroes or trailing
9057 * garbage. If we come across a name that doesn't meet those
9058 * criteria, skip it.
9059 */
9060 if (de->d_name[0] < '1' || de->d_name[1] > '9')
9061 continue;
9062 errno = 0;
9063 tsoid = strtoul(de->d_name, &badp, 10);
9064 if (*badp != '\0' || errno == EINVAL || errno == ERANGE)
9065 continue;
9066
9067 snprintf(fullpath, sizeof(fullpath), "%s/%s", PG_TBLSPC_DIR, de->d_name);
9068
9069 de_type = get_dirent_type(fullpath, de, false, ERROR);
9070
9071 if (de_type == PGFILETYPE_LNK)
9072 {
9073 StringInfoData escapedpath;
9074 int rllen;
9075
9076 rllen = readlink(fullpath, linkpath, sizeof(linkpath));
9077 if (rllen < 0)
9078 {
9080 (errmsg("could not read symbolic link \"%s\": %m",
9081 fullpath)));
9082 continue;
9083 }
9084 else if (rllen >= sizeof(linkpath))
9085 {
9087 (errmsg("symbolic link \"%s\" target is too long",
9088 fullpath)));
9089 continue;
9090 }
9091 linkpath[rllen] = '\0';
9092
9093 /*
9094 * Relpath holds the relative path of the tablespace directory
9095 * when it's located within PGDATA, or NULL if it's located
9096 * elsewhere.
9097 */
9098 if (rllen > datadirpathlen &&
9099 strncmp(linkpath, DataDir, datadirpathlen) == 0 &&
9100 IS_DIR_SEP(linkpath[datadirpathlen]))
9101 relpath = pstrdup(linkpath + datadirpathlen + 1);
9102
9103 /*
9104 * Add a backslash-escaped version of the link path to the
9105 * tablespace map file.
9106 */
9107 initStringInfo(&escapedpath);
9108 for (s = linkpath; *s; s++)
9109 {
9110 if (*s == '\n' || *s == '\r' || *s == '\\')
9111 appendStringInfoChar(&escapedpath, '\\');
9112 appendStringInfoChar(&escapedpath, *s);
9113 }
9114 appendStringInfo(tblspcmapfile, "%s %s\n",
9115 de->d_name, escapedpath.data);
9116 pfree(escapedpath.data);
9117 }
9118 else if (de_type == PGFILETYPE_DIR)
9119 {
9120 /*
9121 * It's possible to use allow_in_place_tablespaces to create
9122 * directories directly under pg_tblspc, for testing purposes
9123 * only.
9124 *
9125 * In this case, we store a relative path rather than an
9126 * absolute path into the tablespaceinfo.
9127 */
9128 snprintf(linkpath, sizeof(linkpath), "%s/%s",
9129 PG_TBLSPC_DIR, de->d_name);
9130 relpath = pstrdup(linkpath);
9131 }
9132 else
9133 {
9134 /* Skip any other file type that appears here. */
9135 continue;
9136 }
9137
9138 ti = palloc(sizeof(tablespaceinfo));
9139 ti->oid = tsoid;
9140 ti->path = pstrdup(linkpath);
9141 ti->rpath = relpath;
9142 ti->size = -1;
9143
9144 if (tablespaces)
9145 *tablespaces = lappend(*tablespaces, ti);
9146 }
9147 FreeDir(tblspcdir);
9148
9149 state->starttime = (pg_time_t) time(NULL);
9150 }
9152
9153 state->started_in_recovery = backup_started_in_recovery;
9154
9155 /*
9156 * Mark that the start phase has correctly finished for the backup.
9157 */
9159}
static bool backup_started_in_recovery
Definition: basebackup.c:123
void RequestCheckpoint(int flags)
PGFileType get_dirent_type(const char *path, const struct dirent *de, bool look_through_symlinks, int elevel)
Definition: file_utils.c:547
PGFileType
Definition: file_utils.h:19
@ PGFILETYPE_LNK
Definition: file_utils.h:24
@ PGFILETYPE_DIR
Definition: file_utils.h:23
char * DataDir
Definition: globals.c:71
#define PG_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:47
#define PG_END_ENSURE_ERROR_CLEANUP(cleanup_function, arg)
Definition: ipc.h:52
List * lappend(List *list, void *datum)
Definition: list.c:339
#define IS_DIR_SEP(ch)
Definition: port.h:103
size_t strlcpy(char *dst, const char *src, size_t siz)
Definition: strlcpy.c:45
static Datum BoolGetDatum(bool X)
Definition: postgres.h:112
unsigned int Oid
Definition: postgres_ext.h:32
#define relpath(rlocator, forknum)
Definition: relpath.h:150
#define PG_TBLSPC_DIR
Definition: relpath.h:41
void appendStringInfo(StringInfo str, const char *fmt,...)
Definition: stringinfo.c:145
void appendStringInfoChar(StringInfo str, char ch)
Definition: stringinfo.c:242
void initStringInfo(StringInfo str)
Definition: stringinfo.c:97
XLogRecPtr lastFpwDisableRecPtr
Definition: xlog.c:552
XLogRecPtr lastBackupStart
Definition: xlog.c:441
Definition: regguts.h:323
char * rpath
Definition: basebackup.h:32
#define readlink(path, buf, size)
Definition: win32_port.h:226
XLogRecPtr RequestXLogSwitch(bool mark_unimportant)
Definition: xlog.c:8130
void do_pg_abort_backup(int code, Datum arg)
Definition: xlog.c:9458
@ SESSION_BACKUP_RUNNING
Definition: xlog.h:291
#define CHECKPOINT_WAIT
Definition: xlog.h:145
#define CHECKPOINT_FAST
Definition: xlog.h:141
#define XLogIsNeeded()
Definition: xlog.h:109

References AllocateDir(), appendStringInfo(), appendStringInfoChar(), Assert(), backup_started_in_recovery, BoolGetDatum(), ControlFileData::checkPoint, CHECKPOINT_FAST, CHECKPOINT_FORCE, CHECKPOINT_WAIT, ControlFileData::checkPointCopy, ControlFile, dirent::d_name, StringInfoData::data, DataDir, do_pg_abort_backup(), ereport, errcode(), errhint(), errmsg(), ERROR, FreeDir(), CheckPoint::fullPageWrites, get_dirent_type(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, IS_DIR_SEP, lappend(), XLogCtlInsert::lastBackupStart, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXPGPATH, tablespaceinfo::oid, palloc(), tablespaceinfo::path, pfree(), PG_END_ENSURE_ERROR_CLEANUP, PG_ENSURE_ERROR_CLEANUP, PG_TBLSPC_DIR, PGFILETYPE_DIR, PGFILETYPE_LNK, pstrdup(), ReadDir(), readlink, RecoveryInProgress(), CheckPoint::redo, relpath, RequestCheckpoint(), RequestXLogSwitch(), tablespaceinfo::rpath, XLogCtlInsert::runningBackups, SESSION_BACKUP_RUNNING, sessionBackupState, tablespaceinfo::size, snprintf, SpinLockAcquire, SpinLockRelease, strlcpy(), CheckPoint::ThisTimeLineID, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, XLogCtl, and XLogIsNeeded.

Referenced by perform_base_backup(), and pg_backup_start().

◆ do_pg_backup_stop()

void do_pg_backup_stop ( BackupState state,
bool  waitforarchive 
)

Definition at line 9184 of file xlog.c.

9185{
9186 bool backup_stopped_in_recovery = false;
9187 char histfilepath[MAXPGPATH];
9188 char lastxlogfilename[MAXFNAMELEN];
9189 char histfilename[MAXFNAMELEN];
9190 XLogSegNo _logSegNo;
9191 FILE *fp;
9192 int seconds_before_warning;
9193 int waits = 0;
9194 bool reported_waiting = false;
9195
9196 Assert(state != NULL);
9197
9198 backup_stopped_in_recovery = RecoveryInProgress();
9199
9200 /*
9201 * During recovery, we don't need to check WAL level. Because, if WAL
9202 * level is not sufficient, it's impossible to get here during recovery.
9203 */
9204 if (!backup_stopped_in_recovery && !XLogIsNeeded())
9205 ereport(ERROR,
9206 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9207 errmsg("WAL level not sufficient for making an online backup"),
9208 errhint("\"wal_level\" must be set to \"replica\" or \"logical\" at server start.")));
9209
9210 /*
9211 * OK to update backup counter and session-level lock.
9212 *
9213 * Note that CHECK_FOR_INTERRUPTS() must not occur while updating them,
9214 * otherwise they can be updated inconsistently, which might cause
9215 * do_pg_abort_backup() to fail.
9216 */
9218
9219 /*
9220 * It is expected that each do_pg_backup_start() call is matched by
9221 * exactly one do_pg_backup_stop() call.
9222 */
9225
9226 /*
9227 * Clean up session-level lock.
9228 *
9229 * You might think that WALInsertLockRelease() can be called before
9230 * cleaning up session-level lock because session-level lock doesn't need
9231 * to be protected with WAL insertion lock. But since
9232 * CHECK_FOR_INTERRUPTS() can occur in it, session-level lock must be
9233 * cleaned up before it.
9234 */
9236
9238
9239 /*
9240 * If we are taking an online backup from the standby, we confirm that the
9241 * standby has not been promoted during the backup.
9242 */
9243 if (state->started_in_recovery && !backup_stopped_in_recovery)
9244 ereport(ERROR,
9245 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9246 errmsg("the standby was promoted during online backup"),
9247 errhint("This means that the backup being taken is corrupt "
9248 "and should not be used. "
9249 "Try taking another online backup.")));
9250
9251 /*
9252 * During recovery, we don't write an end-of-backup record. We assume that
9253 * pg_control was backed up last and its minimum recovery point can be
9254 * available as the backup end location. Since we don't have an
9255 * end-of-backup record, we use the pg_control value to check whether
9256 * we've reached the end of backup when starting recovery from this
9257 * backup. We have no way of checking if pg_control wasn't backed up last
9258 * however.
9259 *
9260 * We don't force a switch to new WAL file but it is still possible to
9261 * wait for all the required files to be archived if waitforarchive is
9262 * true. This is okay if we use the backup to start a standby and fetch
9263 * the missing WAL using streaming replication. But in the case of an
9264 * archive recovery, a user should set waitforarchive to true and wait for
9265 * them to be archived to ensure that all the required files are
9266 * available.
9267 *
9268 * We return the current minimum recovery point as the backup end
9269 * location. Note that it can be greater than the exact backup end
9270 * location if the minimum recovery point is updated after the backup of
9271 * pg_control. This is harmless for current uses.
9272 *
9273 * XXX currently a backup history file is for informational and debug
9274 * purposes only. It's not essential for an online backup. Furthermore,
9275 * even if it's created, it will not be archived during recovery because
9276 * an archiver is not invoked. So it doesn't seem worthwhile to write a
9277 * backup history file during recovery.
9278 */
9279 if (backup_stopped_in_recovery)
9280 {
9281 XLogRecPtr recptr;
9282
9283 /*
9284 * Check to see if all WAL replayed during online backup contain
9285 * full-page writes.
9286 */
9288 recptr = XLogCtl->lastFpwDisableRecPtr;
9290
9291 if (state->startpoint <= recptr)
9292 ereport(ERROR,
9293 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
9294 errmsg("WAL generated with \"full_page_writes=off\" was replayed "
9295 "during online backup"),
9296 errhint("This means that the backup being taken on the standby "
9297 "is corrupt and should not be used. "
9298 "Enable \"full_page_writes\" and run CHECKPOINT on the primary, "
9299 "and then try an online backup again.")));
9300
9301
9302 LWLockAcquire(ControlFileLock, LW_SHARED);
9303 state->stoppoint = ControlFile->minRecoveryPoint;
9305 LWLockRelease(ControlFileLock);
9306 }
9307 else
9308 {
9309 char *history_file;
9310
9311 /*
9312 * Write the backup-end xlog record
9313 */
9315 XLogRegisterData(&state->startpoint,
9316 sizeof(state->startpoint));
9317 state->stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
9318
9319 /*
9320 * Given that we're not in recovery, InsertTimeLineID is set and can't
9321 * change, so we can read it without a lock.
9322 */
9323 state->stoptli = XLogCtl->InsertTimeLineID;
9324
9325 /*
9326 * Force a switch to a new xlog segment file, so that the backup is
9327 * valid as soon as archiver moves out the current segment file.
9328 */
9329 RequestXLogSwitch(false);
9330
9331 state->stoptime = (pg_time_t) time(NULL);
9332
9333 /*
9334 * Write the backup history file
9335 */
9336 XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9337 BackupHistoryFilePath(histfilepath, state->stoptli, _logSegNo,
9338 state->startpoint, wal_segment_size);
9339 fp = AllocateFile(histfilepath, "w");
9340 if (!fp)
9341 ereport(ERROR,
9343 errmsg("could not create file \"%s\": %m",
9344 histfilepath)));
9345
9346 /* Build and save the contents of the backup history file */
9347 history_file = build_backup_content(state, true);
9348 fprintf(fp, "%s", history_file);
9349 pfree(history_file);
9350
9351 if (fflush(fp) || ferror(fp) || FreeFile(fp))
9352 ereport(ERROR,
9354 errmsg("could not write file \"%s\": %m",
9355 histfilepath)));
9356
9357 /*
9358 * Clean out any no-longer-needed history files. As a side effect,
9359 * this will post a .ready file for the newly created history file,
9360 * notifying the archiver that history file may be archived
9361 * immediately.
9362 */
9364 }
9365
9366 /*
9367 * If archiving is enabled, wait for all the required WAL files to be
9368 * archived before returning. If archiving isn't enabled, the required WAL
9369 * needs to be transported via streaming replication (hopefully with
9370 * wal_keep_size set high enough), or some more exotic mechanism like
9371 * polling and copying files from pg_wal with script. We have no knowledge
9372 * of those mechanisms, so it's up to the user to ensure that he gets all
9373 * the required WAL.
9374 *
9375 * We wait until both the last WAL file filled during backup and the
9376 * history file have been archived, and assume that the alphabetic sorting
9377 * property of the WAL files ensures any earlier WAL files are safely
9378 * archived as well.
9379 *
9380 * We wait forever, since archive_command is supposed to work and we
9381 * assume the admin wanted his backup to work completely. If you don't
9382 * wish to wait, then either waitforarchive should be passed in as false,
9383 * or you can set statement_timeout. Also, some notices are issued to
9384 * clue in anyone who might be doing this interactively.
9385 */
9386
9387 if (waitforarchive &&
9388 ((!backup_stopped_in_recovery && XLogArchivingActive()) ||
9389 (backup_stopped_in_recovery && XLogArchivingAlways())))
9390 {
9391 XLByteToPrevSeg(state->stoppoint, _logSegNo, wal_segment_size);
9392 XLogFileName(lastxlogfilename, state->stoptli, _logSegNo,
9394
9395 XLByteToSeg(state->startpoint, _logSegNo, wal_segment_size);
9396 BackupHistoryFileName(histfilename, state->stoptli, _logSegNo,
9397 state->startpoint, wal_segment_size);
9398
9399 seconds_before_warning = 60;
9400 waits = 0;
9401
9402 while (XLogArchiveIsBusy(lastxlogfilename) ||
9403 XLogArchiveIsBusy(histfilename))
9404 {
9406
9407 if (!reported_waiting && waits > 5)
9408 {
9410 (errmsg("base backup done, waiting for required WAL segments to be archived")));
9411 reported_waiting = true;
9412 }
9413
9414 (void) WaitLatch(MyLatch,
9416 1000L,
9417 WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE);
9419
9420 if (++waits >= seconds_before_warning)
9421 {
9422 seconds_before_warning *= 2; /* This wraps in >10 years... */
9424 (errmsg("still waiting for all required WAL segments to be archived (%d seconds elapsed)",
9425 waits),
9426 errhint("Check that your \"archive_command\" is executing properly. "
9427 "You can safely cancel this backup, "
9428 "but the database backup will not be usable without all the WAL segments.")));
9429 }
9430 }
9431
9433 (errmsg("all required WAL segments have been archived")));
9434 }
9435 else if (waitforarchive)
9437 (errmsg("WAL archiving is not enabled; you must ensure that all required WAL segments are copied through other means to complete the backup")));
9438}
#define fprintf(file, fmt, msg)
Definition: cubescan.l:21
#define NOTICE
Definition: elog.h:35
int FreeFile(FILE *file)
Definition: fd.c:2840
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2641
struct Latch * MyLatch
Definition: globals.c:63
void ResetLatch(Latch *latch)
Definition: latch.c:374
int WaitLatch(Latch *latch, int wakeEvents, long timeout, uint32 wait_event_info)
Definition: latch.c:172
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
#define XLOG_BACKUP_END
Definition: pg_control.h:73
#define WL_TIMEOUT
Definition: waiteventset.h:37
#define WL_EXIT_ON_PM_DEATH
Definition: waiteventset.h:39
#define WL_LATCH_SET
Definition: waiteventset.h:34
static void CleanupBackupHistory(void)
Definition: xlog.c:4180
#define XLogArchivingAlways()
Definition: xlog.h:102
static void BackupHistoryFileName(char *fname, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
static void BackupHistoryFilePath(char *path, TimeLineID tli, XLogSegNo logSegNo, XLogRecPtr startpoint, int wal_segsz_bytes)
bool XLogArchiveIsBusy(const char *xlog)
Definition: xlogarchive.c:619
char * build_backup_content(BackupState *state, bool ishistoryfile)
Definition: xlogbackup.c:29

References AllocateFile(), Assert(), BackupHistoryFileName(), BackupHistoryFilePath(), build_backup_content(), CHECK_FOR_INTERRUPTS, CleanupBackupHistory(), ControlFile, ereport, errcode(), errcode_for_file_access(), errhint(), errmsg(), ERROR, fprintf, FreeFile(), XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlData::InsertTimeLineID, XLogCtlData::lastFpwDisableRecPtr, LW_SHARED, LWLockAcquire(), LWLockRelease(), MAXFNAMELEN, MAXPGPATH, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MyLatch, NOTICE, pfree(), RecoveryInProgress(), RequestXLogSwitch(), ResetLatch(), XLogCtlInsert::runningBackups, SESSION_BACKUP_NONE, sessionBackupState, SpinLockAcquire, SpinLockRelease, WaitLatch(), wal_segment_size, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WARNING, WL_EXIT_ON_PM_DEATH, WL_LATCH_SET, WL_TIMEOUT, XLByteToPrevSeg, XLByteToSeg, XLOG_BACKUP_END, XLogArchiveIsBusy(), XLogArchivingActive, XLogArchivingAlways, XLogBeginInsert(), XLogCtl, XLogFileName(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by perform_base_backup(), and pg_backup_stop().

◆ get_backup_status()

SessionBackupState get_backup_status ( void  )

Definition at line 9165 of file xlog.c.

9166{
9167 return sessionBackupState;
9168}

References sessionBackupState.

Referenced by pg_backup_start(), pg_backup_stop(), and SendBaseBackup().

◆ get_sync_bit()

static int get_sync_bit ( int  method)
static

Definition at line 8669 of file xlog.c.

8670{
8671 int o_direct_flag = 0;
8672
8673 /*
8674 * Use O_DIRECT if requested, except in walreceiver process. The WAL
8675 * written by walreceiver is normally read by the startup process soon
8676 * after it's written. Also, walreceiver performs unaligned writes, which
8677 * don't work with O_DIRECT, so it is required for correctness too.
8678 */
8680 o_direct_flag = PG_O_DIRECT;
8681
8682 /* If fsync is disabled, never open in sync mode */
8683 if (!enableFsync)
8684 return o_direct_flag;
8685
8686 switch (method)
8687 {
8688 /*
8689 * enum values for all sync options are defined even if they are
8690 * not supported on the current platform. But if not, they are
8691 * not included in the enum option array, and therefore will never
8692 * be seen here.
8693 */
8697 return o_direct_flag;
8698#ifdef O_SYNC
8700 return O_SYNC | o_direct_flag;
8701#endif
8702#ifdef O_DSYNC
8704 return O_DSYNC | o_direct_flag;
8705#endif
8706 default:
8707 /* can't happen (unless we are out of sync with option array) */
8708 elog(ERROR, "unrecognized \"wal_sync_method\": %d", method);
8709 return 0; /* silence warning */
8710 }
8711}
int io_direct_flags
Definition: fd.c:168
#define IO_DIRECT_WAL
Definition: fd.h:55
#define PG_O_DIRECT
Definition: fd.h:96
bool enableFsync
Definition: globals.c:129
#define AmWalReceiverProcess()
Definition: miscadmin.h:391
#define O_DSYNC
Definition: win32_port.h:342
@ WAL_SYNC_METHOD_OPEN
Definition: xlog.h:26
@ WAL_SYNC_METHOD_FDATASYNC
Definition: xlog.h:25
@ WAL_SYNC_METHOD_FSYNC_WRITETHROUGH
Definition: xlog.h:27
@ WAL_SYNC_METHOD_OPEN_DSYNC
Definition: xlog.h:28
@ WAL_SYNC_METHOD_FSYNC
Definition: xlog.h:24

References AmWalReceiverProcess, elog, enableFsync, ERROR, io_direct_flags, IO_DIRECT_WAL, O_DSYNC, PG_O_DIRECT, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, and WAL_SYNC_METHOD_OPEN_DSYNC.

Referenced by assign_wal_sync_method(), XLogFileInit(), XLogFileInitInternal(), and XLogFileOpen().

◆ GetActiveWalLevelOnStandby()

WalLevel GetActiveWalLevelOnStandby ( void  )

Definition at line 4915 of file xlog.c.

4916{
4917 return ControlFile->wal_level;
4918}

References ControlFile, and ControlFileData::wal_level.

Referenced by CheckLogicalDecodingRequirements().

◆ GetDefaultCharSignedness()

bool GetDefaultCharSignedness ( void  )

Definition at line 4643 of file xlog.c.

4644{
4646}
bool default_char_signedness
Definition: pg_control.h:230

References ControlFile, and ControlFileData::default_char_signedness.

Referenced by CMPTRGM_CHOOSE().

◆ GetFakeLSNForUnloggedRel()

XLogRecPtr GetFakeLSNForUnloggedRel ( void  )

Definition at line 4658 of file xlog.c.

4659{
4661}
static uint64 pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
Definition: atomics.h:520

References pg_atomic_fetch_add_u64(), XLogCtlData::unloggedLSN, and XLogCtl.

Referenced by gistGetFakeLSN().

◆ GetFlushRecPtr()

XLogRecPtr GetFlushRecPtr ( TimeLineID insertTLI)

◆ GetFullPageWriteInfo()

void GetFullPageWriteInfo ( XLogRecPtr RedoRecPtr_p,
bool *  doPageWrites_p 
)

Definition at line 6539 of file xlog.c.

6540{
6541 *RedoRecPtr_p = RedoRecPtr;
6542 *doPageWrites_p = doPageWrites;
6543}
static bool doPageWrites
Definition: xlog.c:288

References doPageWrites, and RedoRecPtr.

Referenced by XLogCheckBufferNeedsBackup(), and XLogInsert().

◆ GetInsertRecPtr()

XLogRecPtr GetInsertRecPtr ( void  )

◆ GetLastImportantRecPtr()

XLogRecPtr GetLastImportantRecPtr ( void  )

Definition at line 6628 of file xlog.c.

6629{
6631 int i;
6632
6633 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
6634 {
6635 XLogRecPtr last_important;
6636
6637 /*
6638 * Need to take a lock to prevent torn reads of the LSN, which are
6639 * possible on some of the supported platforms. WAL insert locks only
6640 * support exclusive mode, so we have to use that.
6641 */
6643 last_important = WALInsertLocks[i].l.lastImportantAt;
6644 LWLockRelease(&WALInsertLocks[i].l.lock);
6645
6646 if (res < last_important)
6647 res = last_important;
6648 }
6649
6650 return res;
6651}
int i
Definition: isn.c:77
XLogRecPtr lastImportantAt
Definition: xlog.c:373
WALInsertLock l
Definition: xlog.c:385
static WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:570
#define NUM_XLOGINSERT_LOCKS
Definition: xlog.c:152

References i, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by BackgroundWriterMain(), CheckArchiveTimeout(), and CreateCheckPoint().

◆ GetLastSegSwitchData()

pg_time_t GetLastSegSwitchData ( XLogRecPtr lastSwitchLSN)

Definition at line 6657 of file xlog.c.

6658{
6659 pg_time_t result;
6660
6661 /* Need WALWriteLock, but shared lock is sufficient */
6662 LWLockAcquire(WALWriteLock, LW_SHARED);
6663 result = XLogCtl->lastSegSwitchTime;
6664 *lastSwitchLSN = XLogCtl->lastSegSwitchLSN;
6665 LWLockRelease(WALWriteLock);
6666
6667 return result;
6668}
pg_time_t lastSegSwitchTime
Definition: xlog.c:468
XLogRecPtr lastSegSwitchLSN
Definition: xlog.c:469

References XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by CheckArchiveTimeout().

◆ GetMockAuthenticationNonce()

char * GetMockAuthenticationNonce ( void  )

Definition at line 4619 of file xlog.c.

4620{
4621 Assert(ControlFile != NULL);
4623}
char mock_authentication_nonce[MOCK_AUTH_NONCE_LEN]
Definition: pg_control.h:237

References Assert(), ControlFile, and ControlFileData::mock_authentication_nonce.

Referenced by scram_mock_salt().

◆ GetOldestRestartPoint()

void GetOldestRestartPoint ( XLogRecPtr oldrecptr,
TimeLineID oldtli 
)

◆ GetRecoveryState()

RecoveryState GetRecoveryState ( void  )

Definition at line 6442 of file xlog.c.

6443{
6444 RecoveryState retval;
6445
6447 retval = XLogCtl->SharedRecoveryState;
6449
6450 return retval;
6451}
RecoveryState
Definition: xlog.h:90

References XLogCtlData::info_lck, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by XLogArchiveCheckDone().

◆ GetRedoRecPtr()

XLogRecPtr GetRedoRecPtr ( void  )

Definition at line 6509 of file xlog.c.

6510{
6511 XLogRecPtr ptr;
6512
6513 /*
6514 * The possibly not up-to-date copy in XlogCtl is enough. Even if we
6515 * grabbed a WAL insertion lock to read the authoritative value in
6516 * Insert->RedoRecPtr, someone might update it just after we've released
6517 * the lock.
6518 */
6520 ptr = XLogCtl->RedoRecPtr;
6522
6523 if (RedoRecPtr < ptr)
6524 RedoRecPtr = ptr;
6525
6526 return RedoRecPtr;
6527}

References XLogCtlData::info_lck, RedoRecPtr, XLogCtlData::RedoRecPtr, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by CheckPointLogicalRewriteHeap(), CheckPointSnapBuild(), MaybeRemoveOldWalSummaries(), nextval_internal(), ReplicationSlotReserveWal(), smgr_bulk_finish(), smgr_bulk_start_smgr(), XLogPageRead(), XLogSaveBufferForHint(), and XLogWrite().

◆ GetSystemIdentifier()

◆ GetWALAvailability()

WALAvailability GetWALAvailability ( XLogRecPtr  targetLSN)

Definition at line 7937 of file xlog.c.

7938{
7939 XLogRecPtr currpos; /* current write LSN */
7940 XLogSegNo currSeg; /* segid of currpos */
7941 XLogSegNo targetSeg; /* segid of targetLSN */
7942 XLogSegNo oldestSeg; /* actual oldest segid */
7943 XLogSegNo oldestSegMaxWalSize; /* oldest segid kept by max_wal_size */
7944 XLogSegNo oldestSlotSeg; /* oldest segid kept by slot */
7945 uint64 keepSegs;
7946
7947 /*
7948 * slot does not reserve WAL. Either deactivated, or has never been active
7949 */
7950 if (!XLogRecPtrIsValid(targetLSN))
7951 return WALAVAIL_INVALID_LSN;
7952
7953 /*
7954 * Calculate the oldest segment currently reserved by all slots,
7955 * considering wal_keep_size and max_slot_wal_keep_size. Initialize
7956 * oldestSlotSeg to the current segment.
7957 */
7958 currpos = GetXLogWriteRecPtr();
7959 XLByteToSeg(currpos, oldestSlotSeg, wal_segment_size);
7960 KeepLogSeg(currpos, &oldestSlotSeg);
7961
7962 /*
7963 * Find the oldest extant segment file. We get 1 until checkpoint removes
7964 * the first WAL segment file since startup, which causes the status being
7965 * wrong under certain abnormal conditions but that doesn't actually harm.
7966 */
7967 oldestSeg = XLogGetLastRemovedSegno() + 1;
7968
7969 /* calculate oldest segment by max_wal_size */
7970 XLByteToSeg(currpos, currSeg, wal_segment_size);
7972
7973 if (currSeg > keepSegs)
7974 oldestSegMaxWalSize = currSeg - keepSegs;
7975 else
7976 oldestSegMaxWalSize = 1;
7977
7978 /* the segment we care about */
7979 XLByteToSeg(targetLSN, targetSeg, wal_segment_size);
7980
7981 /*
7982 * No point in returning reserved or extended status values if the
7983 * targetSeg is known to be lost.
7984 */
7985 if (targetSeg >= oldestSlotSeg)
7986 {
7987 /* show "reserved" when targetSeg is within max_wal_size */
7988 if (targetSeg >= oldestSegMaxWalSize)
7989 return WALAVAIL_RESERVED;
7990
7991 /* being retained by slots exceeding max_wal_size */
7992 return WALAVAIL_EXTENDED;
7993 }
7994
7995 /* WAL segments are no longer retained but haven't been removed yet */
7996 if (targetSeg >= oldestSeg)
7997 return WALAVAIL_UNRESERVED;
7998
7999 /* Definitely lost */
8000 return WALAVAIL_REMOVED;
8001}
XLogSegNo XLogGetLastRemovedSegno(void)
Definition: xlog.c:3777
XLogRecPtr GetXLogWriteRecPtr(void)
Definition: xlog.c:9515
@ WALAVAIL_REMOVED
Definition: xlog.h:194
@ WALAVAIL_RESERVED
Definition: xlog.h:190
@ WALAVAIL_UNRESERVED
Definition: xlog.h:193
@ WALAVAIL_EXTENDED
Definition: xlog.h:191
@ WALAVAIL_INVALID_LSN
Definition: xlog.h:189

References ConvertToXSegs, GetXLogWriteRecPtr(), KeepLogSeg(), max_wal_size_mb, wal_segment_size, WALAVAIL_EXTENDED, WALAVAIL_INVALID_LSN, WALAVAIL_REMOVED, WALAVAIL_RESERVED, WALAVAIL_UNRESERVED, XLByteToSeg, XLogGetLastRemovedSegno(), and XLogRecPtrIsValid.

Referenced by pg_get_replication_slots().

◆ GetWALInsertionTimeLine()

TimeLineID GetWALInsertionTimeLine ( void  )

◆ GetWALInsertionTimeLineIfSet()

TimeLineID GetWALInsertionTimeLineIfSet ( void  )

Definition at line 6608 of file xlog.c.

6609{
6610 TimeLineID insertTLI;
6611
6613 insertTLI = XLogCtl->InsertTimeLineID;
6615
6616 return insertTLI;
6617}

References XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by GetLatestLSN().

◆ GetXLogBuffer()

static char * GetXLogBuffer ( XLogRecPtr  ptr,
TimeLineID  tli 
)
static

Definition at line 1638 of file xlog.c.

1639{
1640 int idx;
1641 XLogRecPtr endptr;
1642 static uint64 cachedPage = 0;
1643 static char *cachedPos = NULL;
1644 XLogRecPtr expectedEndPtr;
1645
1646 /*
1647 * Fast path for the common case that we need to access again the same
1648 * page as last time.
1649 */
1650 if (ptr / XLOG_BLCKSZ == cachedPage)
1651 {
1652 Assert(((XLogPageHeader) cachedPos)->xlp_magic == XLOG_PAGE_MAGIC);
1653 Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1654 return cachedPos + ptr % XLOG_BLCKSZ;
1655 }
1656
1657 /*
1658 * The XLog buffer cache is organized so that a page is always loaded to a
1659 * particular buffer. That way we can easily calculate the buffer a given
1660 * page must be loaded into, from the XLogRecPtr alone.
1661 */
1662 idx = XLogRecPtrToBufIdx(ptr);
1663
1664 /*
1665 * See what page is loaded in the buffer at the moment. It could be the
1666 * page we're looking for, or something older. It can't be anything newer
1667 * - that would imply the page we're looking for has already been written
1668 * out to disk and evicted, and the caller is responsible for making sure
1669 * that doesn't happen.
1670 *
1671 * We don't hold a lock while we read the value. If someone is just about
1672 * to initialize or has just initialized the page, it's possible that we
1673 * get InvalidXLogRecPtr. That's ok, we'll grab the mapping lock (in
1674 * AdvanceXLInsertBuffer) and retry if we see anything other than the page
1675 * we're looking for.
1676 */
1677 expectedEndPtr = ptr;
1678 expectedEndPtr += XLOG_BLCKSZ - ptr % XLOG_BLCKSZ;
1679
1681 if (expectedEndPtr != endptr)
1682 {
1683 XLogRecPtr initializedUpto;
1684
1685 /*
1686 * Before calling AdvanceXLInsertBuffer(), which can block, let others
1687 * know how far we're finished with inserting the record.
1688 *
1689 * NB: If 'ptr' points to just after the page header, advertise a
1690 * position at the beginning of the page rather than 'ptr' itself. If
1691 * there are no other insertions running, someone might try to flush
1692 * up to our advertised location. If we advertised a position after
1693 * the page header, someone might try to flush the page header, even
1694 * though page might actually not be initialized yet. As the first
1695 * inserter on the page, we are effectively responsible for making
1696 * sure that it's initialized, before we let insertingAt to move past
1697 * the page header.
1698 */
1699 if (ptr % XLOG_BLCKSZ == SizeOfXLogShortPHD &&
1700 XLogSegmentOffset(ptr, wal_segment_size) > XLOG_BLCKSZ)
1701 initializedUpto = ptr - SizeOfXLogShortPHD;
1702 else if (ptr % XLOG_BLCKSZ == SizeOfXLogLongPHD &&
1703 XLogSegmentOffset(ptr, wal_segment_size) < XLOG_BLCKSZ)
1704 initializedUpto = ptr - SizeOfXLogLongPHD;
1705 else
1706 initializedUpto = ptr;
1707
1708 WALInsertLockUpdateInsertingAt(initializedUpto);
1709
1710 AdvanceXLInsertBuffer(ptr, tli, false);
1712
1713 if (expectedEndPtr != endptr)
1714 elog(PANIC, "could not find WAL buffer for %X/%08X",
1715 LSN_FORMAT_ARGS(ptr));
1716 }
1717 else
1718 {
1719 /*
1720 * Make sure the initialization of the page is visible to us, and
1721 * won't arrive later to overwrite the WAL data we write on the page.
1722 */
1724 }
1725
1726 /*
1727 * Found the buffer holding this page. Return a pointer to the right
1728 * offset within the page.
1729 */
1730 cachedPage = ptr / XLOG_BLCKSZ;
1731 cachedPos = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1732
1733 Assert(((XLogPageHeader) cachedPos)->xlp_magic == XLOG_PAGE_MAGIC);
1734 Assert(((XLogPageHeader) cachedPos)->xlp_pageaddr == ptr - (ptr % XLOG_BLCKSZ));
1735
1736 return cachedPos + ptr % XLOG_BLCKSZ;
1737}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:262
#define pg_memory_barrier()
Definition: atomics.h:141
static void WALInsertLockUpdateInsertingAt(XLogRecPtr insertingAt)
Definition: xlog.c:1477
static void AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
Definition: xlog.c:1991

References AdvanceXLInsertBuffer(), Assert(), elog, idx(), LSN_FORMAT_ARGS, XLogCtlData::pages, PANIC, pg_atomic_read_u64(), pg_memory_barrier, SizeOfXLogLongPHD, SizeOfXLogShortPHD, wal_segment_size, WALInsertLockUpdateInsertingAt(), XLogCtlData::xlblocks, XLOG_PAGE_MAGIC, XLogCtl, XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by CopyXLogRecordToWAL(), and CreateOverwriteContrecordRecord().

◆ GetXLogInsertRecPtr()

XLogRecPtr GetXLogInsertRecPtr ( void  )

Definition at line 9499 of file xlog.c.

9500{
9502 uint64 current_bytepos;
9503
9504 SpinLockAcquire(&Insert->insertpos_lck);
9505 current_bytepos = Insert->CurrBytePos;
9506 SpinLockRelease(&Insert->insertpos_lck);
9507
9508 return XLogBytePosToRecPtr(current_bytepos);
9509}

References XLogCtlData::Insert, Insert(), SpinLockAcquire, SpinLockRelease, XLogBytePosToRecPtr(), and XLogCtl.

Referenced by CreateOverwriteContrecordRecord(), gistGetFakeLSN(), logical_begin_heap_rewrite(), pg_current_wal_insert_lsn(), and ReplicationSlotReserveWal().

◆ GetXLogWriteRecPtr()

◆ InitControlFile()

static void InitControlFile ( uint64  sysidentifier,
uint32  data_checksum_version 
)
static

Definition at line 4223 of file xlog.c.

4224{
4225 char mock_auth_nonce[MOCK_AUTH_NONCE_LEN];
4226
4227 /*
4228 * Generate a random nonce. This is used for authentication requests that
4229 * will fail because the user does not exist. The nonce is used to create
4230 * a genuine-looking password challenge for the non-existent user, in lieu
4231 * of an actual stored password.
4232 */
4233 if (!pg_strong_random(mock_auth_nonce, MOCK_AUTH_NONCE_LEN))
4234 ereport(PANIC,
4235 (errcode(ERRCODE_INTERNAL_ERROR),
4236 errmsg("could not generate secret authorization token")));
4237
4238 memset(ControlFile, 0, sizeof(ControlFileData));
4239 /* Initialize pg_control status fields */
4240 ControlFile->system_identifier = sysidentifier;
4244
4245 /* Set important parameter values for use when replaying WAL */
4254 ControlFile->data_checksum_version = data_checksum_version;
4255}
bool track_commit_timestamp
Definition: commit_ts.c:109
#define MOCK_AUTH_NONCE_LEN
Definition: pg_control.h:28
bool pg_strong_random(void *buf, size_t len)
bool track_commit_timestamp
Definition: pg_control.h:185
bool wal_log_hints
Definition: xlog.c:125
#define FirstNormalUnloggedLSN
Definition: xlogdefs.h:37

References ControlFile, ControlFileData::data_checksum_version, DB_SHUTDOWNED, ereport, errcode(), errmsg(), FirstNormalUnloggedLSN, max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, ControlFileData::MaxConnections, MOCK_AUTH_NONCE_LEN, ControlFileData::mock_authentication_nonce, PANIC, pg_strong_random(), ControlFileData::state, ControlFileData::system_identifier, track_commit_timestamp, ControlFileData::track_commit_timestamp, ControlFileData::unloggedLSN, wal_level, ControlFileData::wal_level, wal_log_hints, and ControlFileData::wal_log_hints.

Referenced by BootStrapXLOG().

◆ InitializeWalConsistencyChecking()

void InitializeWalConsistencyChecking ( void  )

Definition at line 4840 of file xlog.c.

4841{
4843
4845 {
4846 struct config_generic *guc;
4847
4848 guc = find_option("wal_consistency_checking", false, false, ERROR);
4849
4851
4852 set_config_option_ext("wal_consistency_checking",
4854 guc->scontext, guc->source, guc->srole,
4855 GUC_ACTION_SET, true, ERROR, false);
4856
4857 /* checking should not be deferred again */
4859 }
4860}
int set_config_option_ext(const char *name, const char *value, GucContext context, GucSource source, Oid srole, GucAction action, bool changeVal, int elevel, bool is_reload)
Definition: guc.c:3256
struct config_generic * find_option(const char *name, bool create_placeholders, bool skip_errors, int elevel)
Definition: guc.c:1113
@ GUC_ACTION_SET
Definition: guc.h:203
GucContext scontext
Definition: guc_tables.h:263
GucSource source
Definition: guc_tables.h:261
char * wal_consistency_checking_string
Definition: xlog.c:127

References Assert(), check_wal_consistency_checking_deferred, ERROR, find_option(), GUC_ACTION_SET, process_shared_preload_libraries_done, config_generic::scontext, set_config_option_ext(), config_generic::source, config_generic::srole, and wal_consistency_checking_string.

Referenced by PostgresSingleUserMain(), and PostmasterMain().

◆ InstallXLogFileSegment()

static bool InstallXLogFileSegment ( XLogSegNo segno,
char *  tmppath,
bool  find_free,
XLogSegNo  max_segno,
TimeLineID  tli 
)
static

Definition at line 3582 of file xlog.c.

3584{
3585 char path[MAXPGPATH];
3586 struct stat stat_buf;
3587
3588 Assert(tli != 0);
3589
3590 XLogFilePath(path, tli, *segno, wal_segment_size);
3591
3592 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
3594 {
3595 LWLockRelease(ControlFileLock);
3596 return false;
3597 }
3598
3599 if (!find_free)
3600 {
3601 /* Force installation: get rid of any pre-existing segment file */
3602 durable_unlink(path, DEBUG1);
3603 }
3604 else
3605 {
3606 /* Find a free slot to put it in */
3607 while (stat(path, &stat_buf) == 0)
3608 {
3609 if ((*segno) >= max_segno)
3610 {
3611 /* Failed to find a free slot within specified range */
3612 LWLockRelease(ControlFileLock);
3613 return false;
3614 }
3615 (*segno)++;
3616 XLogFilePath(path, tli, *segno, wal_segment_size);
3617 }
3618 }
3619
3620 Assert(access(path, F_OK) != 0 && errno == ENOENT);
3621 if (durable_rename(tmppath, path, LOG) != 0)
3622 {
3623 LWLockRelease(ControlFileLock);
3624 /* durable_rename already emitted log message */
3625 return false;
3626 }
3627
3628 LWLockRelease(ControlFileLock);
3629
3630 return true;
3631}
int durable_unlink(const char *fname, int elevel)
Definition: fd.c:869
short access
Definition: preproc-type.c:36
bool InstallXLogFileSegmentActive
Definition: xlog.c:527
#define stat
Definition: win32_port.h:274

References Assert(), DEBUG1, durable_rename(), durable_unlink(), XLogCtlData::InstallXLogFileSegmentActive, LOG, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), MAXPGPATH, stat, wal_segment_size, XLogCtl, and XLogFilePath().

Referenced by RemoveXlogFile(), XLogFileCopy(), and XLogFileInitInternal().

◆ IsInstallXLogFileSegmentActive()

bool IsInstallXLogFileSegmentActive ( void  )

Definition at line 9564 of file xlog.c.

9565{
9566 bool result;
9567
9568 LWLockAcquire(ControlFileLock, LW_SHARED);
9570 LWLockRelease(ControlFileLock);
9571
9572 return result;
9573}

References XLogCtlData::InstallXLogFileSegmentActive, LW_SHARED, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by XLogFileRead().

◆ issue_xlog_fsync()

void issue_xlog_fsync ( int  fd,
XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 8759 of file xlog.c.

8760{
8761 char *msg = NULL;
8763
8764 Assert(tli != 0);
8765
8766 /*
8767 * Quick exit if fsync is disabled or write() has already synced the WAL
8768 * file.
8769 */
8770 if (!enableFsync ||
8773 return;
8774
8775 /*
8776 * Measure I/O timing to sync the WAL file for pg_stat_io.
8777 */
8779
8780 pgstat_report_wait_start(WAIT_EVENT_WAL_SYNC);
8781 switch (wal_sync_method)
8782 {
8784 if (pg_fsync_no_writethrough(fd) != 0)
8785 msg = _("could not fsync file \"%s\": %m");
8786 break;
8787#ifdef HAVE_FSYNC_WRITETHROUGH
8789 if (pg_fsync_writethrough(fd) != 0)
8790 msg = _("could not fsync write-through file \"%s\": %m");
8791 break;
8792#endif
8794 if (pg_fdatasync(fd) != 0)
8795 msg = _("could not fdatasync file \"%s\": %m");
8796 break;
8799 /* not reachable */
8800 Assert(false);
8801 break;
8802 default:
8803 ereport(PANIC,
8804 errcode(ERRCODE_INVALID_PARAMETER_VALUE),
8805 errmsg_internal("unrecognized \"wal_sync_method\": %d", wal_sync_method));
8806 break;
8807 }
8808
8809 /* PANIC if failed to fsync */
8810 if (msg)
8811 {
8812 char xlogfname[MAXFNAMELEN];
8813 int save_errno = errno;
8814
8815 XLogFileName(xlogfname, tli, segno, wal_segment_size);
8816 errno = save_errno;
8817 ereport(PANIC,
8819 errmsg(msg, xlogfname)));
8820 }
8821
8823
8825 start, 1, 0);
8826}
#define _(x)
Definition: elog.c:91
int pg_fsync_no_writethrough(int fd)
Definition: fd.c:438
int pg_fdatasync(int fd)
Definition: fd.c:477
int pg_fsync_writethrough(int fd)
Definition: fd.c:458
return str start
@ IOOBJECT_WAL
Definition: pgstat.h:279
@ IOCONTEXT_NORMAL
Definition: pgstat.h:289
@ IOOP_FSYNC
Definition: pgstat.h:308
instr_time pgstat_prepare_io_time(bool track_io_guc)
Definition: pgstat_io.c:91
void pgstat_count_io_op_time(IOObject io_object, IOContext io_context, IOOp io_op, instr_time start_time, uint32 cnt, uint64 bytes)
Definition: pgstat_io.c:122
static int fd(const char *x, int i)
Definition: preproc-init.c:105
bool track_wal_io_timing
Definition: xlog.c:139

References _, Assert(), enableFsync, ereport, errcode(), errcode_for_file_access(), errmsg(), errmsg_internal(), fd(), IOCONTEXT_NORMAL, IOOBJECT_WAL, IOOP_FSYNC, MAXFNAMELEN, PANIC, pg_fdatasync(), pg_fsync_no_writethrough(), pg_fsync_writethrough(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), start, track_wal_io_timing, wal_segment_size, wal_sync_method, WAL_SYNC_METHOD_FDATASYNC, WAL_SYNC_METHOD_FSYNC, WAL_SYNC_METHOD_FSYNC_WRITETHROUGH, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, and XLogFileName().

Referenced by XLogWalRcvFlush(), and XLogWrite().

◆ KeepLogSeg()

static void KeepLogSeg ( XLogRecPtr  recptr,
XLogSegNo logSegNo 
)
static

Definition at line 8021 of file xlog.c.

8022{
8023 XLogSegNo currSegNo;
8024 XLogSegNo segno;
8025 XLogRecPtr keep;
8026
8027 XLByteToSeg(recptr, currSegNo, wal_segment_size);
8028 segno = currSegNo;
8029
8030 /* Calculate how many segments are kept by slots. */
8032 if (XLogRecPtrIsValid(keep) && keep < recptr)
8033 {
8034 XLByteToSeg(keep, segno, wal_segment_size);
8035
8036 /*
8037 * Account for max_slot_wal_keep_size to avoid keeping more than
8038 * configured. However, don't do that during a binary upgrade: if
8039 * slots were to be invalidated because of this, it would not be
8040 * possible to preserve logical ones during the upgrade.
8041 */
8043 {
8044 uint64 slot_keep_segs;
8045
8046 slot_keep_segs =
8048
8049 if (currSegNo - segno > slot_keep_segs)
8050 segno = currSegNo - slot_keep_segs;
8051 }
8052 }
8053
8054 /*
8055 * If WAL summarization is in use, don't remove WAL that has yet to be
8056 * summarized.
8057 */
8058 keep = GetOldestUnsummarizedLSN(NULL, NULL);
8059 if (XLogRecPtrIsValid(keep))
8060 {
8061 XLogSegNo unsummarized_segno;
8062
8063 XLByteToSeg(keep, unsummarized_segno, wal_segment_size);
8064 if (unsummarized_segno < segno)
8065 segno = unsummarized_segno;
8066 }
8067
8068 /* but, keep at least wal_keep_size if that's set */
8069 if (wal_keep_size_mb > 0)
8070 {
8071 uint64 keep_segs;
8072
8074 if (currSegNo - segno < keep_segs)
8075 {
8076 /* avoid underflow, don't go below 1 */
8077 if (currSegNo <= keep_segs)
8078 segno = 1;
8079 else
8080 segno = currSegNo - keep_segs;
8081 }
8082 }
8083
8084 /* don't delete WAL segments newer than the calculated segment */
8085 if (segno < *logSegNo)
8086 *logSegNo = segno;
8087}
bool IsBinaryUpgrade
Definition: globals.c:121
XLogRecPtr GetOldestUnsummarizedLSN(TimeLineID *tli, bool *lsn_is_exact)
int wal_keep_size_mb
Definition: xlog.c:118
static XLogRecPtr XLogGetReplicationSlotMinimumLSN(void)
Definition: xlog.c:2682
int max_slot_wal_keep_size_mb
Definition: xlog.c:137

References ConvertToXSegs, GetOldestUnsummarizedLSN(), IsBinaryUpgrade, max_slot_wal_keep_size_mb, wal_keep_size_mb, wal_segment_size, XLByteToSeg, XLogGetReplicationSlotMinimumLSN(), and XLogRecPtrIsValid.

Referenced by CreateCheckPoint(), CreateRestartPoint(), and GetWALAvailability().

◆ LocalProcessControlFile()

void LocalProcessControlFile ( bool  reset)

Definition at line 4902 of file xlog.c.

4903{
4904 Assert(reset || ControlFile == NULL);
4907}
void reset(void)
Definition: sql-declare.c:600

References Assert(), ControlFile, palloc(), ReadControlFile(), and reset().

Referenced by PostgresSingleUserMain(), PostmasterMain(), and PostmasterStateMachine().

◆ LocalSetXLogInsertAllowed()

static int LocalSetXLogInsertAllowed ( void  )
static

Definition at line 6494 of file xlog.c.

6495{
6496 int oldXLogAllowed = LocalXLogInsertAllowed;
6497
6499
6500 return oldXLogAllowed;
6501}

References LocalXLogInsertAllowed.

Referenced by CreateCheckPoint(), and StartupXLOG().

◆ LogCheckpointEnd()

static void LogCheckpointEnd ( bool  restartpoint)
static

Definition at line 6753 of file xlog.c.

6754{
6755 long write_msecs,
6756 sync_msecs,
6757 total_msecs,
6758 longest_msecs,
6759 average_msecs;
6760 uint64 average_sync_time;
6761
6763
6766
6769
6770 /* Accumulate checkpoint timing summary data, in milliseconds. */
6771 PendingCheckpointerStats.write_time += write_msecs;
6772 PendingCheckpointerStats.sync_time += sync_msecs;
6773
6774 /*
6775 * All of the published timing statistics are accounted for. Only
6776 * continue if a log message is to be written.
6777 */
6778 if (!log_checkpoints)
6779 return;
6780
6783
6784 /*
6785 * Timing values returned from CheckpointStats are in microseconds.
6786 * Convert to milliseconds for consistent printing.
6787 */
6788 longest_msecs = (long) ((CheckpointStats.ckpt_longest_sync + 999) / 1000);
6789
6790 average_sync_time = 0;
6792 average_sync_time = CheckpointStats.ckpt_agg_sync_time /
6794 average_msecs = (long) ((average_sync_time + 999) / 1000);
6795
6796 /*
6797 * ControlFileLock is not required to see ControlFile->checkPoint and
6798 * ->checkPointCopy here as we are the only updator of those variables at
6799 * this moment.
6800 */
6801 if (restartpoint)
6802 ereport(LOG,
6803 (errmsg("restartpoint complete: wrote %d buffers (%.1f%%), "
6804 "wrote %d SLRU buffers; %d WAL file(s) added, "
6805 "%d removed, %d recycled; write=%ld.%03d s, "
6806 "sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
6807 "longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
6808 "estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
6815 write_msecs / 1000, (int) (write_msecs % 1000),
6816 sync_msecs / 1000, (int) (sync_msecs % 1000),
6817 total_msecs / 1000, (int) (total_msecs % 1000),
6819 longest_msecs / 1000, (int) (longest_msecs % 1000),
6820 average_msecs / 1000, (int) (average_msecs % 1000),
6821 (int) (PrevCheckPointDistance / 1024.0),
6822 (int) (CheckPointDistanceEstimate / 1024.0),
6825 else
6826 ereport(LOG,
6827 (errmsg("checkpoint complete: wrote %d buffers (%.1f%%), "
6828 "wrote %d SLRU buffers; %d WAL file(s) added, "
6829 "%d removed, %d recycled; write=%ld.%03d s, "
6830 "sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
6831 "longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
6832 "estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
6839 write_msecs / 1000, (int) (write_msecs % 1000),
6840 sync_msecs / 1000, (int) (sync_msecs % 1000),
6841 total_msecs / 1000, (int) (total_msecs % 1000),
6843 longest_msecs / 1000, (int) (longest_msecs % 1000),
6844 average_msecs / 1000, (int) (average_msecs % 1000),
6845 (int) (PrevCheckPointDistance / 1024.0),
6846 (int) (CheckPointDistanceEstimate / 1024.0),
6849}
long TimestampDifferenceMilliseconds(TimestampTz start_time, TimestampTz stop_time)
Definition: timestamp.c:1757
PgStat_CheckpointerStats PendingCheckpointerStats
uint64 ckpt_agg_sync_time
Definition: xlog.h:176
uint64 ckpt_longest_sync
Definition: xlog.h:175
TimestampTz ckpt_end_t
Definition: xlog.h:165
int ckpt_slru_written
Definition: xlog.h:168
int ckpt_sync_rels
Definition: xlog.h:174
PgStat_Counter sync_time
Definition: pgstat.h:265
PgStat_Counter write_time
Definition: pgstat.h:264
static double CheckPointDistanceEstimate
Definition: xlog.c:161
static double PrevCheckPointDistance
Definition: xlog.c:162

References ControlFileData::checkPoint, ControlFileData::checkPointCopy, CheckPointDistanceEstimate, CheckpointStats, CheckpointStatsData::ckpt_agg_sync_time, CheckpointStatsData::ckpt_bufs_written, CheckpointStatsData::ckpt_end_t, CheckpointStatsData::ckpt_longest_sync, CheckpointStatsData::ckpt_segs_added, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, CheckpointStatsData::ckpt_slru_written, CheckpointStatsData::ckpt_start_t, CheckpointStatsData::ckpt_sync_end_t, CheckpointStatsData::ckpt_sync_rels, CheckpointStatsData::ckpt_sync_t, CheckpointStatsData::ckpt_write_t, ControlFile, ereport, errmsg(), GetCurrentTimestamp(), LOG, log_checkpoints, LSN_FORMAT_ARGS, NBuffers, PendingCheckpointerStats, PrevCheckPointDistance, CheckPoint::redo, PgStat_CheckpointerStats::sync_time, TimestampDifferenceMilliseconds(), and PgStat_CheckpointerStats::write_time.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ LogCheckpointStart()

static void LogCheckpointStart ( int  flags,
bool  restartpoint 
)
static

Definition at line 6721 of file xlog.c.

6722{
6723 if (restartpoint)
6724 ereport(LOG,
6725 /* translator: the placeholders show checkpoint options */
6726 (errmsg("restartpoint starting:%s%s%s%s%s%s%s%s",
6727 (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6728 (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
6729 (flags & CHECKPOINT_FAST) ? " fast" : "",
6730 (flags & CHECKPOINT_FORCE) ? " force" : "",
6731 (flags & CHECKPOINT_WAIT) ? " wait" : "",
6732 (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
6733 (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
6734 (flags & CHECKPOINT_FLUSH_UNLOGGED) ? " flush-unlogged" : "")));
6735 else
6736 ereport(LOG,
6737 /* translator: the placeholders show checkpoint options */
6738 (errmsg("checkpoint starting:%s%s%s%s%s%s%s%s",
6739 (flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
6740 (flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
6741 (flags & CHECKPOINT_FAST) ? " fast" : "",
6742 (flags & CHECKPOINT_FORCE) ? " force" : "",
6743 (flags & CHECKPOINT_WAIT) ? " wait" : "",
6744 (flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
6745 (flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
6746 (flags & CHECKPOINT_FLUSH_UNLOGGED) ? " flush-unlogged" : "")));
6747}
#define CHECKPOINT_FLUSH_UNLOGGED
Definition: xlog.h:143
#define CHECKPOINT_CAUSE_XLOG
Definition: xlog.h:148
#define CHECKPOINT_CAUSE_TIME
Definition: xlog.h:149

References CHECKPOINT_CAUSE_TIME, CHECKPOINT_CAUSE_XLOG, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FAST, CHECKPOINT_FLUSH_UNLOGGED, CHECKPOINT_FORCE, CHECKPOINT_IS_SHUTDOWN, CHECKPOINT_WAIT, ereport, errmsg(), and LOG.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ PerformRecoveryXLogAction()

static bool PerformRecoveryXLogAction ( void  )
static

Definition at line 6356 of file xlog.c.

6357{
6358 bool promoted = false;
6359
6360 /*
6361 * Perform a checkpoint to update all our recovery activity to disk.
6362 *
6363 * Note that we write a shutdown checkpoint rather than an on-line one.
6364 * This is not particularly critical, but since we may be assigning a new
6365 * TLI, using a shutdown checkpoint allows us to have the rule that TLI
6366 * only changes in shutdown checkpoints, which allows some extra error
6367 * checking in xlog_redo.
6368 *
6369 * In promotion, only create a lightweight end-of-recovery record instead
6370 * of a full checkpoint. A checkpoint is requested later, after we're
6371 * fully out of recovery mode and already accepting queries.
6372 */
6375 {
6376 promoted = true;
6377
6378 /*
6379 * Insert a special WAL record to mark the end of recovery, since we
6380 * aren't doing a checkpoint. That means that the checkpointer process
6381 * may likely be in the middle of a time-smoothed restartpoint and
6382 * could continue to be for minutes after this. That sounds strange,
6383 * but the effect is roughly the same and it would be stranger to try
6384 * to come out of the restartpoint and then checkpoint. We request a
6385 * checkpoint later anyway, just for safety.
6386 */
6388 }
6389 else
6390 {
6394 }
6395
6396 return promoted;
6397}
static void CreateEndOfRecoveryRecord(void)
Definition: xlog.c:7444
bool PromoteIsTriggered(void)

References ArchiveRecoveryRequested, CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_FAST, CHECKPOINT_WAIT, CreateEndOfRecoveryRecord(), IsUnderPostmaster, PromoteIsTriggered(), and RequestCheckpoint().

Referenced by StartupXLOG().

◆ PreallocXlogFiles()

static void PreallocXlogFiles ( XLogRecPtr  endptr,
TimeLineID  tli 
)
static

Definition at line 3709 of file xlog.c.

3710{
3711 XLogSegNo _logSegNo;
3712 int lf;
3713 bool added;
3714 char path[MAXPGPATH];
3715 uint64 offset;
3716
3718 return; /* unlocked check says no */
3719
3720 XLByteToPrevSeg(endptr, _logSegNo, wal_segment_size);
3721 offset = XLogSegmentOffset(endptr - 1, wal_segment_size);
3722 if (offset >= (uint32) (0.75 * wal_segment_size))
3723 {
3724 _logSegNo++;
3725 lf = XLogFileInitInternal(_logSegNo, tli, &added, path);
3726 if (lf >= 0)
3727 close(lf);
3728 if (added)
3730 }
3731}
static int XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli, bool *added, char *path)
Definition: xlog.c:3211

References CheckpointStats, CheckpointStatsData::ckpt_segs_added, close, XLogCtlData::InstallXLogFileSegmentActive, MAXPGPATH, wal_segment_size, XLByteToPrevSeg, XLogCtl, XLogFileInitInternal(), and XLogSegmentOffset.

Referenced by CreateCheckPoint(), CreateRestartPoint(), and StartupXLOG().

◆ ReachedEndOfBackup()

void ReachedEndOfBackup ( XLogRecPtr  EndRecPtr,
TimeLineID  tli 
)

Definition at line 6319 of file xlog.c.

6320{
6321 /*
6322 * We have reached the end of base backup, as indicated by pg_control. The
6323 * data on disk is now consistent (unless minRecoveryPoint is further
6324 * ahead, which can happen if we crashed during previous recovery). Reset
6325 * backupStartPoint and backupEndPoint, and update minRecoveryPoint to
6326 * make sure we don't allow starting up at an earlier point even if
6327 * recovery is stopped and restarted soon after this.
6328 */
6329 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6330
6331 if (ControlFile->minRecoveryPoint < EndRecPtr)
6332 {
6333 ControlFile->minRecoveryPoint = EndRecPtr;
6335 }
6336
6341
6342 LWLockRelease(ControlFileLock);
6343}
XLogRecPtr backupStartPoint
Definition: pg_control.h:170
bool backupEndRequired
Definition: pg_control.h:172
XLogRecPtr backupEndPoint
Definition: pg_control.h:171

References ControlFileData::backupEndPoint, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFile, InvalidXLogRecPtr, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, and UpdateControlFile().

Referenced by CheckRecoveryConsistency().

◆ ReadControlFile()

static void ReadControlFile ( void  )
static

Definition at line 4368 of file xlog.c.

4369{
4370 pg_crc32c crc;
4371 int fd;
4372 char wal_segsz_str[20];
4373 int r;
4374
4375 /*
4376 * Read data...
4377 */
4379 O_RDWR | PG_BINARY);
4380 if (fd < 0)
4381 ereport(PANIC,
4383 errmsg("could not open file \"%s\": %m",
4385
4386 pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_READ);
4387 r = read(fd, ControlFile, sizeof(ControlFileData));
4388 if (r != sizeof(ControlFileData))
4389 {
4390 if (r < 0)
4391 ereport(PANIC,
4393 errmsg("could not read file \"%s\": %m",
4395 else
4396 ereport(PANIC,
4398 errmsg("could not read file \"%s\": read %d of %zu",
4399 XLOG_CONTROL_FILE, r, sizeof(ControlFileData))));
4400 }
4402
4403 close(fd);
4404
4405 /*
4406 * Check for expected pg_control format version. If this is wrong, the
4407 * CRC check will likely fail because we'll be checking the wrong number
4408 * of bytes. Complaining about wrong version will probably be more
4409 * enlightening than complaining about wrong CRC.
4410 */
4411
4413 ereport(FATAL,
4414 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4415 errmsg("database files are incompatible with server"),
4416 errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d (0x%08x),"
4417 " but the server was compiled with PG_CONTROL_VERSION %d (0x%08x).",
4420 errhint("This could be a problem of mismatched byte ordering. It looks like you need to initdb.")));
4421
4423 ereport(FATAL,
4424 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4425 errmsg("database files are incompatible with server"),
4426 errdetail("The database cluster was initialized with PG_CONTROL_VERSION %d,"
4427 " but the server was compiled with PG_CONTROL_VERSION %d.",
4429 errhint("It looks like you need to initdb.")));
4430
4431 /* Now check the CRC. */
4435 offsetof(ControlFileData, crc));
4436 FIN_CRC32C(crc);
4437
4438 if (!EQ_CRC32C(crc, ControlFile->crc))
4439 ereport(FATAL,
4440 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4441 errmsg("incorrect checksum in control file")));
4442
4443 /*
4444 * Do compatibility checking immediately. If the database isn't
4445 * compatible with the backend executable, we want to abort before we can
4446 * possibly do any damage.
4447 */
4449 ereport(FATAL,
4450 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4451 errmsg("database files are incompatible with server"),
4452 /* translator: %s is a variable name and %d is its value */
4453 errdetail("The database cluster was initialized with %s %d,"
4454 " but the server was compiled with %s %d.",
4455 "CATALOG_VERSION_NO", ControlFile->catalog_version_no,
4456 "CATALOG_VERSION_NO", CATALOG_VERSION_NO),
4457 errhint("It looks like you need to initdb.")));
4458 if (ControlFile->maxAlign != MAXIMUM_ALIGNOF)
4459 ereport(FATAL,
4460 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4461 errmsg("database files are incompatible with server"),
4462 /* translator: %s is a variable name and %d is its value */
4463 errdetail("The database cluster was initialized with %s %d,"
4464 " but the server was compiled with %s %d.",
4465 "MAXALIGN", ControlFile->maxAlign,
4466 "MAXALIGN", MAXIMUM_ALIGNOF),
4467 errhint("It looks like you need to initdb.")));
4469 ereport(FATAL,
4470 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4471 errmsg("database files are incompatible with server"),
4472 errdetail("The database cluster appears to use a different floating-point number format than the server executable."),
4473 errhint("It looks like you need to initdb.")));
4474 if (ControlFile->blcksz != BLCKSZ)
4475 ereport(FATAL,
4476 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4477 errmsg("database files are incompatible with server"),
4478 /* translator: %s is a variable name and %d is its value */
4479 errdetail("The database cluster was initialized with %s %d,"
4480 " but the server was compiled with %s %d.",
4481 "BLCKSZ", ControlFile->blcksz,
4482 "BLCKSZ", BLCKSZ),
4483 errhint("It looks like you need to recompile or initdb.")));
4484 if (ControlFile->relseg_size != RELSEG_SIZE)
4485 ereport(FATAL,
4486 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4487 errmsg("database files are incompatible with server"),
4488 /* translator: %s is a variable name and %d is its value */
4489 errdetail("The database cluster was initialized with %s %d,"
4490 " but the server was compiled with %s %d.",
4491 "RELSEG_SIZE", ControlFile->relseg_size,
4492 "RELSEG_SIZE", RELSEG_SIZE),
4493 errhint("It looks like you need to recompile or initdb.")));
4495 ereport(FATAL,
4496 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4497 errmsg("database files are incompatible with server"),
4498 /* translator: %s is a variable name and %d is its value */
4499 errdetail("The database cluster was initialized with %s %d,"
4500 " but the server was compiled with %s %d.",
4501 "SLRU_PAGES_PER_SEGMENT", ControlFile->slru_pages_per_segment,
4502 "SLRU_PAGES_PER_SEGMENT", SLRU_PAGES_PER_SEGMENT),
4503 errhint("It looks like you need to recompile or initdb.")));
4504 if (ControlFile->xlog_blcksz != XLOG_BLCKSZ)
4505 ereport(FATAL,
4506 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4507 errmsg("database files are incompatible with server"),
4508 /* translator: %s is a variable name and %d is its value */
4509 errdetail("The database cluster was initialized with %s %d,"
4510 " but the server was compiled with %s %d.",
4511 "XLOG_BLCKSZ", ControlFile->xlog_blcksz,
4512 "XLOG_BLCKSZ", XLOG_BLCKSZ),
4513 errhint("It looks like you need to recompile or initdb.")));
4515 ereport(FATAL,
4516 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4517 errmsg("database files are incompatible with server"),
4518 /* translator: %s is a variable name and %d is its value */
4519 errdetail("The database cluster was initialized with %s %d,"
4520 " but the server was compiled with %s %d.",
4521 "NAMEDATALEN", ControlFile->nameDataLen,
4522 "NAMEDATALEN", NAMEDATALEN),
4523 errhint("It looks like you need to recompile or initdb.")));
4525 ereport(FATAL,
4526 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4527 errmsg("database files are incompatible with server"),
4528 /* translator: %s is a variable name and %d is its value */
4529 errdetail("The database cluster was initialized with %s %d,"
4530 " but the server was compiled with %s %d.",
4531 "INDEX_MAX_KEYS", ControlFile->indexMaxKeys,
4532 "INDEX_MAX_KEYS", INDEX_MAX_KEYS),
4533 errhint("It looks like you need to recompile or initdb.")));
4535 ereport(FATAL,
4536 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4537 errmsg("database files are incompatible with server"),
4538 /* translator: %s is a variable name and %d is its value */
4539 errdetail("The database cluster was initialized with %s %d,"
4540 " but the server was compiled with %s %d.",
4541 "TOAST_MAX_CHUNK_SIZE", ControlFile->toast_max_chunk_size,
4542 "TOAST_MAX_CHUNK_SIZE", (int) TOAST_MAX_CHUNK_SIZE),
4543 errhint("It looks like you need to recompile or initdb.")));
4545 ereport(FATAL,
4546 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
4547 errmsg("database files are incompatible with server"),
4548 /* translator: %s is a variable name and %d is its value */
4549 errdetail("The database cluster was initialized with %s %d,"
4550 " but the server was compiled with %s %d.",
4551 "LOBLKSIZE", ControlFile->loblksize,
4552 "LOBLKSIZE", (int) LOBLKSIZE),
4553 errhint("It looks like you need to recompile or initdb.")));
4554
4555 Assert(ControlFile->float8ByVal); /* vestigial, not worth an error msg */
4556
4558
4560 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4561 errmsg_plural("invalid WAL segment size in control file (%d byte)",
4562 "invalid WAL segment size in control file (%d bytes)",
4565 errdetail("The WAL segment size must be a power of two between 1 MB and 1 GB.")));
4566
4567 snprintf(wal_segsz_str, sizeof(wal_segsz_str), "%d", wal_segment_size);
4568 SetConfigOption("wal_segment_size", wal_segsz_str, PGC_INTERNAL,
4570
4571 /* check and update variables dependent on wal_segment_size */
4573 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4574 /* translator: both %s are GUC names */
4575 errmsg("\"%s\" must be at least twice \"%s\"",
4576 "min_wal_size", "wal_segment_size")));
4577
4579 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
4580 /* translator: both %s are GUC names */
4581 errmsg("\"%s\" must be at least twice \"%s\"",
4582 "max_wal_size", "wal_segment_size")));
4583
4585 (wal_segment_size / XLOG_BLCKSZ * UsableBytesInPage) -
4587
4589
4590 /* Make the initdb settings visible as GUC variables, too */
4591 SetConfigOption("data_checksums", DataChecksumsEnabled() ? "yes" : "no",
4593}
#define PG_BINARY
Definition: c.h:1263
#define CATALOG_VERSION_NO
Definition: catversion.h:60
int errmsg_plural(const char *fmt_singular, const char *fmt_plural, unsigned long n,...)
Definition: elog.c:1193
int BasicOpenFile(const char *fileName, int fileFlags)
Definition: fd.c:1086
void SetConfigOption(const char *name, const char *value, GucContext context, GucSource source)
Definition: guc.c:4196
@ PGC_S_DYNAMIC_DEFAULT
Definition: guc.h:114
@ PGC_INTERNAL
Definition: guc.h:73
#define TOAST_MAX_CHUNK_SIZE
Definition: heaptoast.h:84
#define read(a, b, c)
Definition: win32.h:13
#define LOBLKSIZE
Definition: large_object.h:70
#define INDEX_MAX_KEYS
#define NAMEDATALEN
#define SLRU_PAGES_PER_SEGMENT
#define FLOATFORMAT_VALUE
Definition: pg_control.h:201
#define PG_CONTROL_VERSION
Definition: pg_control.h:25
#define EQ_CRC32C(c1, c2)
Definition: pg_crc32c.h:42
uint32 pg_control_version
Definition: pg_control.h:125
uint32 xlog_seg_size
Definition: pg_control.h:213
uint32 nameDataLen
Definition: pg_control.h:215
uint32 slru_pages_per_segment
Definition: pg_control.h:210
uint32 indexMaxKeys
Definition: pg_control.h:216
uint32 relseg_size
Definition: pg_control.h:208
uint32 catalog_version_no
Definition: pg_control.h:126
double floatFormat
Definition: pg_control.h:200
uint32 xlog_blcksz
Definition: pg_control.h:212
uint32 loblksize
Definition: pg_control.h:219
pg_crc32c crc
Definition: pg_control.h:240
uint32 toast_max_chunk_size
Definition: pg_control.h:218
#define UsableBytesInPage
Definition: xlog.c:598
bool DataChecksumsEnabled(void)
Definition: xlog.c:4629
static int UsableBytesInSegment
Definition: xlog.c:607
int min_wal_size_mb
Definition: xlog.c:117
#define XLOG_CONTROL_FILE

References Assert(), BasicOpenFile(), ControlFileData::blcksz, CalculateCheckpointSegments(), CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ConvertToXSegs, ControlFileData::crc, crc, DataChecksumsEnabled(), EQ_CRC32C, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errdetail(), errhint(), errmsg(), errmsg_plural(), ERROR, FATAL, fd(), FIN_CRC32C, ControlFileData::float8ByVal, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, IsValidWalSegSize, ControlFileData::loblksize, LOBLKSIZE, max_wal_size_mb, ControlFileData::maxAlign, min_wal_size_mb, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_VERSION, ControlFileData::pg_control_version, PGC_INTERNAL, PGC_S_DYNAMIC_DEFAULT, pgstat_report_wait_end(), pgstat_report_wait_start(), read, ControlFileData::relseg_size, SetConfigOption(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, ControlFileData::slru_pages_per_segment, SLRU_PAGES_PER_SEGMENT, snprintf, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG(), and LocalProcessControlFile().

◆ RecoveryInProgress()

bool RecoveryInProgress ( void  )

Definition at line 6406 of file xlog.c.

6407{
6408 /*
6409 * We check shared state each time only until we leave recovery mode. We
6410 * can't re-enter recovery, so there's no need to keep checking after the
6411 * shared variable has once been seen false.
6412 */
6414 return false;
6415 else
6416 {
6417 /*
6418 * use volatile pointer to make sure we make a fresh read of the
6419 * shared variable.
6420 */
6421 volatile XLogCtlData *xlogctl = XLogCtl;
6422
6424
6425 /*
6426 * Note: We don't need a memory barrier when we're still in recovery.
6427 * We might exit recovery immediately after return, so the caller
6428 * can't rely on 'true' meaning that we're still in recovery anyway.
6429 */
6430
6432 }
6433}
static bool LocalRecoveryInProgress
Definition: xlog.c:226

References LocalRecoveryInProgress, RECOVERY_STATE_DONE, XLogCtlData::SharedRecoveryState, and XLogCtl.

Referenced by amcheck_index_mainfork_expected(), attribute_statistics_update(), BackgroundWriterMain(), BeginReportingGUCOptions(), brin_desummarize_range(), brin_summarize_range(), CanInvalidateIdleSlot(), check_transaction_isolation(), check_transaction_read_only(), CheckArchiveTimeout(), CheckLogicalDecodingRequirements(), CheckpointerMain(), ComputeXidHorizons(), CreateCheckPoint(), CreateDecodingContext(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_start(), do_pg_backup_stop(), error_commit_ts_disabled(), ExecCheckpoint(), get_relation_info(), GetCurrentLSN(), GetLatestLSN(), GetNewMultiXactId(), GetNewObjectId(), GetNewTransactionId(), GetOldestActiveTransactionId(), GetOldestSafeDecodingTransactionId(), GetRunningTransactionData(), GetSerializableTransactionSnapshot(), GetSerializableTransactionSnapshotInt(), GetSnapshotData(), GetStrictOldestNonRemovableTransactionId(), gin_clean_pending_list(), GlobalVisHorizonKindForRel(), heap_force_common(), heap_page_prune_opt(), IdentifySystem(), InitTempTableNamespace(), InitWalSender(), IsCheckpointOnSchedule(), LockAcquireExtended(), logical_read_xlog_page(), MaintainLatestCompletedXid(), MarkBufferDirtyHint(), perform_base_backup(), pg_clear_attribute_stats(), pg_create_restore_point(), pg_current_wal_flush_lsn(), pg_current_wal_insert_lsn(), pg_current_wal_lsn(), pg_get_sequence_data(), pg_get_wal_replay_pause_state(), pg_is_in_recovery(), pg_is_wal_replay_paused(), pg_log_standby_snapshot(), pg_logical_slot_get_changes_guts(), pg_promote(), pg_replication_slot_advance(), pg_sequence_last_value(), pg_switch_wal(), pg_sync_replication_slots(), pg_wal_replay_pause(), pg_wal_replay_resume(), pg_walfile_name(), pg_walfile_name_offset(), PhysicalWakeupLogicalWalSnd(), PrepareRedoAdd(), PrepareRedoRemoveFull(), PreventCommandDuringRecovery(), ProcessStandbyPSRequestMessage(), ProcSleep(), read_local_xlog_page_guts(), ReadReplicationSlot(), recovery_create_dbdir(), relation_statistics_update(), ReplicationSlotAlter(), ReplicationSlotCreate(), ReplicationSlotDrop(), ReplicationSlotReserveWal(), replorigin_check_prerequisites(), ReportChangedGUCOptions(), sendDir(), SerialSetActiveSerXmin(), show_in_hot_standby(), ShutdownXLOG(), SnapBuildWaitSnapshot(), StandbySlotsHaveCaughtup(), StartLogicalReplication(), StartReplication(), StartTransaction(), TransactionIdIsInProgress(), TruncateMultiXact(), UpdateFullPageWrites(), verify_heapam(), WaitForLSN(), WALReadFromBuffers(), WalReceiverMain(), WalSndWaitForWal(), xlog_decode(), XLogBackgroundFlush(), XLogFlush(), XLogInsertAllowed(), and XLogSendPhysical().

◆ RecoveryRestartPoint()

static void RecoveryRestartPoint ( const CheckPoint checkPoint,
XLogReaderState record 
)
static

Definition at line 7619 of file xlog.c.

7620{
7621 /*
7622 * Also refrain from creating a restartpoint if we have seen any
7623 * references to non-existent pages. Restarting recovery from the
7624 * restartpoint would not see the references, so we would lose the
7625 * cross-check that the pages belonged to a relation that was dropped
7626 * later.
7627 */
7629 {
7630 elog(DEBUG2,
7631 "could not record restart point at %X/%08X because there are unresolved references to invalid pages",
7632 LSN_FORMAT_ARGS(checkPoint->redo));
7633 return;
7634 }
7635
7636 /*
7637 * Copy the checkpoint record to shared memory, so that checkpointer can
7638 * work out the next time it wants to perform a restartpoint.
7639 */
7643 XLogCtl->lastCheckPoint = *checkPoint;
7645}
XLogRecPtr EndRecPtr
Definition: xlogreader.h:206
XLogRecPtr ReadRecPtr
Definition: xlogreader.h:205
bool XLogHaveInvalidPages(void)
Definition: xlogutils.c:224

References DEBUG2, elog, XLogReaderState::EndRecPtr, XLogCtlData::info_lck, XLogCtlData::lastCheckPoint, XLogCtlData::lastCheckPointEndPtr, XLogCtlData::lastCheckPointRecPtr, LSN_FORMAT_ARGS, XLogReaderState::ReadRecPtr, CheckPoint::redo, SpinLockAcquire, SpinLockRelease, XLogCtl, and XLogHaveInvalidPages().

Referenced by xlog_redo().

◆ register_persistent_abort_backup_handler()

void register_persistent_abort_backup_handler ( void  )

Definition at line 9485 of file xlog.c.

9486{
9487 static bool already_done = false;
9488
9489 if (already_done)
9490 return;
9492 already_done = true;
9493}
void before_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:337

References before_shmem_exit(), BoolGetDatum(), and do_pg_abort_backup().

Referenced by pg_backup_start().

◆ RemoveNonParentXlogFiles()

void RemoveNonParentXlogFiles ( XLogRecPtr  switchpoint,
TimeLineID  newTLI 
)

Definition at line 3959 of file xlog.c.

3960{
3961 DIR *xldir;
3962 struct dirent *xlde;
3963 char switchseg[MAXFNAMELEN];
3964 XLogSegNo endLogSegNo;
3965 XLogSegNo switchLogSegNo;
3966 XLogSegNo recycleSegNo;
3967
3968 /*
3969 * Initialize info about where to begin the work. This will recycle,
3970 * somewhat arbitrarily, 10 future segments.
3971 */
3972 XLByteToPrevSeg(switchpoint, switchLogSegNo, wal_segment_size);
3973 XLByteToSeg(switchpoint, endLogSegNo, wal_segment_size);
3974 recycleSegNo = endLogSegNo + 10;
3975
3976 /*
3977 * Construct a filename of the last segment to be kept.
3978 */
3979 XLogFileName(switchseg, newTLI, switchLogSegNo, wal_segment_size);
3980
3981 elog(DEBUG2, "attempting to remove WAL segments newer than log file %s",
3982 switchseg);
3983
3984 xldir = AllocateDir(XLOGDIR);
3985
3986 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3987 {
3988 /* Ignore files that are not XLOG segments */
3989 if (!IsXLogFileName(xlde->d_name))
3990 continue;
3991
3992 /*
3993 * Remove files that are on a timeline older than the new one we're
3994 * switching to, but with a segment number >= the first segment on the
3995 * new timeline.
3996 */
3997 if (strncmp(xlde->d_name, switchseg, 8) < 0 &&
3998 strcmp(xlde->d_name + 8, switchseg + 8) > 0)
3999 {
4000 /*
4001 * If the file has already been marked as .ready, however, don't
4002 * remove it yet. It should be OK to remove it - files that are
4003 * not part of our timeline history are not required for recovery
4004 * - but seems safer to let them be archived and removed later.
4005 */
4006 if (!XLogArchiveIsReady(xlde->d_name))
4007 RemoveXlogFile(xlde, recycleSegNo, &endLogSegNo, newTLI);
4008 }
4009 }
4010
4011 FreeDir(xldir);
4012}
static void RemoveXlogFile(const struct dirent *segment_de, XLogSegNo recycleSegNo, XLogSegNo *endlogSegNo, TimeLineID insertTLI)
Definition: xlog.c:4028
static bool IsXLogFileName(const char *fname)
bool XLogArchiveIsReady(const char *xlog)
Definition: xlogarchive.c:694

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveIsReady(), XLOGDIR, and XLogFileName().

Referenced by ApplyWalRecord(), and CleanupAfterArchiveRecovery().

◆ RemoveOldXlogFiles()

static void RemoveOldXlogFiles ( XLogSegNo  segno,
XLogRecPtr  lastredoptr,
XLogRecPtr  endptr,
TimeLineID  insertTLI 
)
static

Definition at line 3884 of file xlog.c.

3886{
3887 DIR *xldir;
3888 struct dirent *xlde;
3889 char lastoff[MAXFNAMELEN];
3890 XLogSegNo endlogSegNo;
3891 XLogSegNo recycleSegNo;
3892
3893 /* Initialize info about where to try to recycle to */
3894 XLByteToSeg(endptr, endlogSegNo, wal_segment_size);
3895 recycleSegNo = XLOGfileslop(lastredoptr);
3896
3897 /*
3898 * Construct a filename of the last segment to be kept. The timeline ID
3899 * doesn't matter, we ignore that in the comparison. (During recovery,
3900 * InsertTimeLineID isn't set, so we can't use that.)
3901 */
3902 XLogFileName(lastoff, 0, segno, wal_segment_size);
3903
3904 elog(DEBUG2, "attempting to remove WAL segments older than log file %s",
3905 lastoff);
3906
3907 xldir = AllocateDir(XLOGDIR);
3908
3909 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3910 {
3911 /* Ignore files that are not XLOG segments */
3912 if (!IsXLogFileName(xlde->d_name) &&
3914 continue;
3915
3916 /*
3917 * We ignore the timeline part of the XLOG segment identifiers in
3918 * deciding whether a segment is still needed. This ensures that we
3919 * won't prematurely remove a segment from a parent timeline. We could
3920 * probably be a little more proactive about removing segments of
3921 * non-parent timelines, but that would be a whole lot more
3922 * complicated.
3923 *
3924 * We use the alphanumeric sorting property of the filenames to decide
3925 * which ones are earlier than the lastoff segment.
3926 */
3927 if (strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
3928 {
3929 if (XLogArchiveCheckDone(xlde->d_name))
3930 {
3931 /* Update the last removed location in shared memory first */
3933
3934 RemoveXlogFile(xlde, recycleSegNo, &endlogSegNo, insertTLI);
3935 }
3936 }
3937 }
3938
3939 FreeDir(xldir);
3940}
static XLogSegNo XLOGfileslop(XLogRecPtr lastredoptr)
Definition: xlog.c:2233
static void UpdateLastRemovedPtr(char *filename)
Definition: xlog.c:3831
static bool IsPartialXLogFileName(const char *fname)

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), IsPartialXLogFileName(), IsXLogFileName(), MAXFNAMELEN, ReadDir(), RemoveXlogFile(), UpdateLastRemovedPtr(), wal_segment_size, XLByteToSeg, XLogArchiveCheckDone(), XLOGDIR, XLogFileName(), and XLOGfileslop().

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ RemoveTempXlogFiles()

static void RemoveTempXlogFiles ( void  )
static

Definition at line 3851 of file xlog.c.

3852{
3853 DIR *xldir;
3854 struct dirent *xlde;
3855
3856 elog(DEBUG2, "removing all temporary WAL segments");
3857
3858 xldir = AllocateDir(XLOGDIR);
3859 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3860 {
3861 char path[MAXPGPATH];
3862
3863 if (strncmp(xlde->d_name, "xlogtemp.", 9) != 0)
3864 continue;
3865
3866 snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlde->d_name);
3867 unlink(path);
3868 elog(DEBUG2, "removed temporary WAL segment \"%s\"", path);
3869 }
3870 FreeDir(xldir);
3871}

References AllocateDir(), dirent::d_name, DEBUG2, elog, FreeDir(), MAXPGPATH, ReadDir(), snprintf, and XLOGDIR.

Referenced by StartupXLOG().

◆ RemoveXlogFile()

static void RemoveXlogFile ( const struct dirent segment_de,
XLogSegNo  recycleSegNo,
XLogSegNo endlogSegNo,
TimeLineID  insertTLI 
)
static

Definition at line 4028 of file xlog.c.

4031{
4032 char path[MAXPGPATH];
4033#ifdef WIN32
4034 char newpath[MAXPGPATH];
4035#endif
4036 const char *segname = segment_de->d_name;
4037
4038 snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname);
4039
4040 /*
4041 * Before deleting the file, see if it can be recycled as a future log
4042 * segment. Only recycle normal files, because we don't want to recycle
4043 * symbolic links pointing to a separate archive directory.
4044 */
4045 if (wal_recycle &&
4046 *endlogSegNo <= recycleSegNo &&
4047 XLogCtl->InstallXLogFileSegmentActive && /* callee rechecks this */
4048 get_dirent_type(path, segment_de, false, DEBUG2) == PGFILETYPE_REG &&
4049 InstallXLogFileSegment(endlogSegNo, path,
4050 true, recycleSegNo, insertTLI))
4051 {
4053 (errmsg_internal("recycled write-ahead log file \"%s\"",
4054 segname)));
4056 /* Needn't recheck that slot on future iterations */
4057 (*endlogSegNo)++;
4058 }
4059 else
4060 {
4061 /* No need for any more future segments, or recycling failed ... */
4062 int rc;
4063
4065 (errmsg_internal("removing write-ahead log file \"%s\"",
4066 segname)));
4067
4068#ifdef WIN32
4069
4070 /*
4071 * On Windows, if another process (e.g another backend) holds the file
4072 * open in FILE_SHARE_DELETE mode, unlink will succeed, but the file
4073 * will still show up in directory listing until the last handle is
4074 * closed. To avoid confusing the lingering deleted file for a live
4075 * WAL file that needs to be archived, rename it before deleting it.
4076 *
4077 * If another process holds the file open without FILE_SHARE_DELETE
4078 * flag, rename will fail. We'll try again at the next checkpoint.
4079 */
4080 snprintf(newpath, MAXPGPATH, "%s.deleted", path);
4081 if (rename(path, newpath) != 0)
4082 {
4083 ereport(LOG,
4085 errmsg("could not rename file \"%s\": %m",
4086 path)));
4087 return;
4088 }
4089 rc = durable_unlink(newpath, LOG);
4090#else
4091 rc = durable_unlink(path, LOG);
4092#endif
4093 if (rc != 0)
4094 {
4095 /* Message already logged by durable_unlink() */
4096 return;
4097 }
4099 }
4100
4101 XLogArchiveCleanup(segname);
4102}
@ PGFILETYPE_REG
Definition: file_utils.h:22
static bool InstallXLogFileSegment(XLogSegNo *segno, char *tmppath, bool find_free, XLogSegNo max_segno, TimeLineID tli)
Definition: xlog.c:3582
bool wal_recycle
Definition: xlog.c:130

References CheckpointStats, CheckpointStatsData::ckpt_segs_recycled, CheckpointStatsData::ckpt_segs_removed, dirent::d_name, DEBUG2, durable_unlink(), ereport, errcode_for_file_access(), errmsg(), errmsg_internal(), get_dirent_type(), InstallXLogFileSegment(), XLogCtlData::InstallXLogFileSegmentActive, LOG, MAXPGPATH, PGFILETYPE_REG, snprintf, wal_recycle, XLogArchiveCleanup(), XLogCtl, and XLOGDIR.

Referenced by RemoveNonParentXlogFiles(), and RemoveOldXlogFiles().

◆ RequestXLogSwitch()

XLogRecPtr RequestXLogSwitch ( bool  mark_unimportant)

Definition at line 8130 of file xlog.c.

8131{
8132 XLogRecPtr RecPtr;
8133
8134 /* XLOG SWITCH has no data */
8136
8137 if (mark_unimportant)
8139 RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH);
8140
8141 return RecPtr;
8142}
#define XLOG_SWITCH
Definition: pg_control.h:72
#define XLOG_MARK_UNIMPORTANT
Definition: xlog.h:155
void XLogSetRecordFlags(uint8 flags)
Definition: xloginsert.c:460

References XLOG_MARK_UNIMPORTANT, XLOG_SWITCH, XLogBeginInsert(), XLogInsert(), and XLogSetRecordFlags().

Referenced by CheckArchiveTimeout(), do_pg_backup_start(), do_pg_backup_stop(), pg_switch_wal(), and ShutdownXLOG().

◆ ReserveXLogInsertLocation()

static pg_attribute_always_inline void ReserveXLogInsertLocation ( int  size,
XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1114 of file xlog.c.

1116{
1118 uint64 startbytepos;
1119 uint64 endbytepos;
1120 uint64 prevbytepos;
1121
1122 size = MAXALIGN(size);
1123
1124 /* All (non xlog-switch) records should contain data. */
1125 Assert(size > SizeOfXLogRecord);
1126
1127 /*
1128 * The duration the spinlock needs to be held is minimized by minimizing
1129 * the calculations that have to be done while holding the lock. The
1130 * current tip of reserved WAL is kept in CurrBytePos, as a byte position
1131 * that only counts "usable" bytes in WAL, that is, it excludes all WAL
1132 * page headers. The mapping between "usable" byte positions and physical
1133 * positions (XLogRecPtrs) can be done outside the locked region, and
1134 * because the usable byte position doesn't include any headers, reserving
1135 * X bytes from WAL is almost as simple as "CurrBytePos += X".
1136 */
1137 SpinLockAcquire(&Insert->insertpos_lck);
1138
1139 startbytepos = Insert->CurrBytePos;
1140 endbytepos = startbytepos + size;
1141 prevbytepos = Insert->PrevBytePos;
1142 Insert->CurrBytePos = endbytepos;
1143 Insert->PrevBytePos = startbytepos;
1144
1145 SpinLockRelease(&Insert->insertpos_lck);
1146
1147 *StartPos = XLogBytePosToRecPtr(startbytepos);
1148 *EndPos = XLogBytePosToEndRecPtr(endbytepos);
1149 *PrevPtr = XLogBytePosToRecPtr(prevbytepos);
1150
1151 /*
1152 * Check that the conversions between "usable byte positions" and
1153 * XLogRecPtrs work consistently in both directions.
1154 */
1155 Assert(XLogRecPtrToBytePos(*StartPos) == startbytepos);
1156 Assert(XLogRecPtrToBytePos(*EndPos) == endbytepos);
1157 Assert(XLogRecPtrToBytePos(*PrevPtr) == prevbytepos);
1158}
#define MAXALIGN(LEN)
Definition: c.h:815
static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos)
Definition: xlog.c:1904
static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr)
Definition: xlog.c:1947

References Assert(), XLogCtlData::Insert, Insert(), MAXALIGN, SizeOfXLogRecord, SpinLockAcquire, SpinLockRelease, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, and XLogRecPtrToBytePos().

Referenced by XLogInsertRecord().

◆ ReserveXLogSwitch()

static bool ReserveXLogSwitch ( XLogRecPtr StartPos,
XLogRecPtr EndPos,
XLogRecPtr PrevPtr 
)
static

Definition at line 1170 of file xlog.c.

1171{
1173 uint64 startbytepos;
1174 uint64 endbytepos;
1175 uint64 prevbytepos;
1177 XLogRecPtr ptr;
1178 uint32 segleft;
1179
1180 /*
1181 * These calculations are a bit heavy-weight to be done while holding a
1182 * spinlock, but since we're holding all the WAL insertion locks, there
1183 * are no other inserters competing for it. GetXLogInsertRecPtr() does
1184 * compete for it, but that's not called very frequently.
1185 */
1186 SpinLockAcquire(&Insert->insertpos_lck);
1187
1188 startbytepos = Insert->CurrBytePos;
1189
1190 ptr = XLogBytePosToEndRecPtr(startbytepos);
1191 if (XLogSegmentOffset(ptr, wal_segment_size) == 0)
1192 {
1193 SpinLockRelease(&Insert->insertpos_lck);
1194 *EndPos = *StartPos = ptr;
1195 return false;
1196 }
1197
1198 endbytepos = startbytepos + size;
1199 prevbytepos = Insert->PrevBytePos;
1200
1201 *StartPos = XLogBytePosToRecPtr(startbytepos);
1202 *EndPos = XLogBytePosToEndRecPtr(endbytepos);
1203
1205 if (segleft != wal_segment_size)
1206 {
1207 /* consume the rest of the segment */
1208 *EndPos += segleft;
1209 endbytepos = XLogRecPtrToBytePos(*EndPos);
1210 }
1211 Insert->CurrBytePos = endbytepos;
1212 Insert->PrevBytePos = startbytepos;
1213
1214 SpinLockRelease(&Insert->insertpos_lck);
1215
1216 *PrevPtr = XLogBytePosToRecPtr(prevbytepos);
1217
1219 Assert(XLogRecPtrToBytePos(*EndPos) == endbytepos);
1220 Assert(XLogRecPtrToBytePos(*StartPos) == startbytepos);
1221 Assert(XLogRecPtrToBytePos(*PrevPtr) == prevbytepos);
1222
1223 return true;
1224}

References Assert(), XLogCtlData::Insert, Insert(), MAXALIGN, SizeOfXLogRecord, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCtl, XLogRecPtrToBytePos(), and XLogSegmentOffset.

Referenced by XLogInsertRecord().

◆ ResetInstallXLogFileSegmentActive()

void ResetInstallXLogFileSegmentActive ( void  )

Definition at line 9556 of file xlog.c.

9557{
9558 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9560 LWLockRelease(ControlFileLock);
9561}

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by WaitForWALToBecomeAvailable(), and XLogShutdownWalRcv().

◆ SetInstallXLogFileSegmentActive()

void SetInstallXLogFileSegmentActive ( void  )

Definition at line 9547 of file xlog.c.

9548{
9549 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
9551 LWLockRelease(ControlFileLock);
9552}

References XLogCtlData::InstallXLogFileSegmentActive, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), and XLogCtl.

Referenced by BootStrapXLOG(), StartupXLOG(), and WaitForWALToBecomeAvailable().

◆ SetWalWriterSleeping()

void SetWalWriterSleeping ( bool  sleeping)

Definition at line 9579 of file xlog.c.

9580{
9582 XLogCtl->WalWriterSleeping = sleeping;
9584}
bool WalWriterSleeping
Definition: xlog.c:534

References XLogCtlData::info_lck, SpinLockAcquire, SpinLockRelease, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by WalWriterMain().

◆ show_archive_command()

const char * show_archive_command ( void  )

Definition at line 4866 of file xlog.c.

4867{
4868 if (XLogArchivingActive())
4869 return XLogArchiveCommand;
4870 else
4871 return "(disabled)";
4872}
char * XLogArchiveCommand
Definition: xlog.c:122

References XLogArchiveCommand, and XLogArchivingActive.

◆ show_in_hot_standby()

const char * show_in_hot_standby ( void  )

Definition at line 4878 of file xlog.c.

4879{
4880 /*
4881 * We display the actual state based on shared memory, so that this GUC
4882 * reports up-to-date state if examined intra-query. The underlying
4883 * variable (in_hot_standby_guc) changes only when we transmit a new value
4884 * to the client.
4885 */
4886 return RecoveryInProgress() ? "on" : "off";
4887}

References RecoveryInProgress().

◆ ShutdownXLOG()

void ShutdownXLOG ( int  code,
Datum  arg 
)

Definition at line 6674 of file xlog.c.

6675{
6676 /*
6677 * We should have an aux process resource owner to use, and we should not
6678 * be in a transaction that's installed some other resowner.
6679 */
6681 Assert(CurrentResourceOwner == NULL ||
6684
6685 /* Don't be chatty in standalone mode */
6687 (errmsg("shutting down")));
6688
6689 /*
6690 * Signal walsenders to move to stopping state.
6691 */
6693
6694 /*
6695 * Wait for WAL senders to be in stopping state. This prevents commands
6696 * from writing new WAL.
6697 */
6699
6700 if (RecoveryInProgress())
6702 else
6703 {
6704 /*
6705 * If archiving is enabled, rotate the last XLOG file so that all the
6706 * remaining records are archived (postmaster wakes up the archiver
6707 * process one more time at the end of shutdown). The checkpoint
6708 * record will go to the next XLOG file and won't be archived (yet).
6709 */
6710 if (XLogArchivingActive())
6711 RequestXLogSwitch(false);
6712
6714 }
6715}
bool IsPostmasterEnvironment
Definition: globals.c:119
ResourceOwner CurrentResourceOwner
Definition: resowner.c:173
ResourceOwner AuxProcessResourceOwner
Definition: resowner.c:176
void WalSndInitStopping(void)
Definition: walsender.c:3872
void WalSndWaitStopping(void)
Definition: walsender.c:3898
bool CreateRestartPoint(int flags)
Definition: xlog.c:7659
bool CreateCheckPoint(int flags)
Definition: xlog.c:6961

References Assert(), AuxProcessResourceOwner, CHECKPOINT_FAST, CHECKPOINT_IS_SHUTDOWN, CreateCheckPoint(), CreateRestartPoint(), CurrentResourceOwner, ereport, errmsg(), IsPostmasterEnvironment, LOG, NOTICE, RecoveryInProgress(), RequestXLogSwitch(), WalSndInitStopping(), WalSndWaitStopping(), and XLogArchivingActive.

Referenced by CheckpointerMain(), and InitPostgres().

◆ StartupXLOG()

void StartupXLOG ( void  )

Definition at line 5483 of file xlog.c.

5484{
5486 CheckPoint checkPoint;
5487 bool wasShutdown;
5488 bool didCrash;
5489 bool haveTblspcMap;
5490 bool haveBackupLabel;
5491 XLogRecPtr EndOfLog;
5492 TimeLineID EndOfLogTLI;
5493 TimeLineID newTLI;
5494 bool performedWalRecovery;
5495 EndOfWalRecoveryInfo *endOfRecoveryInfo;
5498 TransactionId oldestActiveXID;
5499 bool promoted = false;
5500 char timebuf[128];
5501
5502 /*
5503 * We should have an aux process resource owner to use, and we should not
5504 * be in a transaction that's installed some other resowner.
5505 */
5507 Assert(CurrentResourceOwner == NULL ||
5510
5511 /*
5512 * Check that contents look valid.
5513 */
5515 ereport(FATAL,
5517 errmsg("control file contains invalid checkpoint location")));
5518
5519 switch (ControlFile->state)
5520 {
5521 case DB_SHUTDOWNED:
5522
5523 /*
5524 * This is the expected case, so don't be chatty in standalone
5525 * mode
5526 */
5528 (errmsg("database system was shut down at %s",
5530 timebuf, sizeof(timebuf)))));
5531 break;
5532
5534 ereport(LOG,
5535 (errmsg("database system was shut down in recovery at %s",
5537 timebuf, sizeof(timebuf)))));
5538 break;
5539
5540 case DB_SHUTDOWNING:
5541 ereport(LOG,
5542 (errmsg("database system shutdown was interrupted; last known up at %s",
5544 timebuf, sizeof(timebuf)))));
5545 break;
5546
5548 ereport(LOG,
5549 (errmsg("database system was interrupted while in recovery at %s",
5551 timebuf, sizeof(timebuf))),
5552 errhint("This probably means that some data is corrupted and"
5553 " you will have to use the last backup for recovery.")));
5554 break;
5555
5557 ereport(LOG,
5558 (errmsg("database system was interrupted while in recovery at log time %s",
5560 timebuf, sizeof(timebuf))),
5561 errhint("If this has occurred more than once some data might be corrupted"
5562 " and you might need to choose an earlier recovery target.")));
5563 break;
5564
5565 case DB_IN_PRODUCTION:
5566 ereport(LOG,
5567 (errmsg("database system was interrupted; last known up at %s",
5569 timebuf, sizeof(timebuf)))));
5570 break;
5571
5572 default:
5573 ereport(FATAL,
5575 errmsg("control file contains invalid database cluster state")));
5576 }
5577
5578 /* This is just to allow attaching to startup process with a debugger */
5579#ifdef XLOG_REPLAY_DELAY
5581 pg_usleep(60000000L);
5582#endif
5583
5584 /*
5585 * Verify that pg_wal, pg_wal/archive_status, and pg_wal/summaries exist.
5586 * In cases where someone has performed a copy for PITR, these directories
5587 * may have been excluded and need to be re-created.
5588 */
5590
5591 /* Set up timeout handler needed to report startup progress. */
5595
5596 /*----------
5597 * If we previously crashed, perform a couple of actions:
5598 *
5599 * - The pg_wal directory may still include some temporary WAL segments
5600 * used when creating a new segment, so perform some clean up to not
5601 * bloat this path. This is done first as there is no point to sync
5602 * this temporary data.
5603 *
5604 * - There might be data which we had written, intending to fsync it, but
5605 * which we had not actually fsync'd yet. Therefore, a power failure in
5606 * the near future might cause earlier unflushed writes to be lost, even
5607 * though more recent data written to disk from here on would be
5608 * persisted. To avoid that, fsync the entire data directory.
5609 */
5612 {
5615 didCrash = true;
5616 }
5617 else
5618 didCrash = false;
5619
5620 /*
5621 * Prepare for WAL recovery if needed.
5622 *
5623 * InitWalRecovery analyzes the control file and the backup label file, if
5624 * any. It updates the in-memory ControlFile buffer according to the
5625 * starting checkpoint, and sets InRecovery and ArchiveRecoveryRequested.
5626 * It also applies the tablespace map file, if any.
5627 */
5628 InitWalRecovery(ControlFile, &wasShutdown,
5629 &haveBackupLabel, &haveTblspcMap);
5630 checkPoint = ControlFile->checkPointCopy;
5631
5632 /* initialize shared memory variables from the checkpoint record */
5633 TransamVariables->nextXid = checkPoint.nextXid;
5634 TransamVariables->nextOid = checkPoint.nextOid;
5636 MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
5637 AdvanceOldestClogXid(checkPoint.oldestXid);
5638 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
5639 SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
5641 checkPoint.newestCommitTsXid);
5642
5643 /*
5644 * Clear out any old relcache cache files. This is *necessary* if we do
5645 * any WAL replay, since that would probably result in the cache files
5646 * being out of sync with database reality. In theory we could leave them
5647 * in place if the database had been cleanly shut down, but it seems
5648 * safest to just remove them always and let them be rebuilt during the
5649 * first backend startup. These files needs to be removed from all
5650 * directories including pg_tblspc, however the symlinks are created only
5651 * after reading tablespace_map file in case of archive recovery from
5652 * backup, so needs to clear old relcache files here after creating
5653 * symlinks.
5654 */
5656
5657 /*
5658 * Initialize replication slots, before there's a chance to remove
5659 * required resources.
5660 */
5662
5663 /*
5664 * Startup logical state, needs to be setup now so we have proper data
5665 * during crash recovery.
5666 */
5668
5669 /*
5670 * Startup CLOG. This must be done after TransamVariables->nextXid has
5671 * been initialized and before we accept connections or begin WAL replay.
5672 */
5673 StartupCLOG();
5674
5675 /*
5676 * Startup MultiXact. We need to do this early to be able to replay
5677 * truncations.
5678 */
5680
5681 /*
5682 * Ditto for commit timestamps. Activate the facility if the setting is
5683 * enabled in the control file, as there should be no tracking of commit
5684 * timestamps done when the setting was disabled. This facility can be
5685 * started or stopped when replaying a XLOG_PARAMETER_CHANGE record.
5686 */
5689
5690 /*
5691 * Recover knowledge about replay progress of known replication partners.
5692 */
5694
5695 /*
5696 * Initialize unlogged LSN. On a clean shutdown, it's restored from the
5697 * control file. On recovery, all unlogged relations are blown away, so
5698 * the unlogged LSN counter can be reset too.
5699 */
5703 else
5706
5707 /*
5708 * Copy any missing timeline history files between 'now' and the recovery
5709 * target timeline from archive to pg_wal. While we don't need those files
5710 * ourselves - the history file of the recovery target timeline covers all
5711 * the previous timelines in the history too - a cascading standby server
5712 * might be interested in them. Or, if you archive the WAL from this
5713 * server to a different archive than the primary, it'd be good for all
5714 * the history files to get archived there after failover, so that you can
5715 * use one of the old timelines as a PITR target. Timeline history files
5716 * are small, so it's better to copy them unnecessarily than not copy them
5717 * and regret later.
5718 */
5720
5721 /*
5722 * Before running in recovery, scan pg_twophase and fill in its status to
5723 * be able to work on entries generated by redo. Doing a scan before
5724 * taking any recovery action has the merit to discard any 2PC files that
5725 * are newer than the first record to replay, saving from any conflicts at
5726 * replay. This avoids as well any subsequent scans when doing recovery
5727 * of the on-disk two-phase data.
5728 */
5730
5731 /*
5732 * When starting with crash recovery, reset pgstat data - it might not be
5733 * valid. Otherwise restore pgstat data. It's safe to do this here,
5734 * because postmaster will not yet have started any other processes.
5735 *
5736 * NB: Restoring replication slot stats relies on slot state to have
5737 * already been restored from disk.
5738 *
5739 * TODO: With a bit of extra work we could just start with a pgstat file
5740 * associated with the checkpoint redo location we're starting from.
5741 */
5742 if (didCrash)
5744 else
5746
5748
5751
5752 /* REDO */
5753 if (InRecovery)
5754 {
5755 /* Initialize state for RecoveryInProgress() */
5759 else
5762
5763 /*
5764 * Update pg_control to show that we are recovering and to show the
5765 * selected checkpoint as the place we are starting from. We also mark
5766 * pg_control with any minimum recovery stop point obtained from a
5767 * backup history file.
5768 *
5769 * No need to hold ControlFileLock yet, we aren't up far enough.
5770 */
5772
5773 /*
5774 * If there was a backup label file, it's done its job and the info
5775 * has now been propagated into pg_control. We must get rid of the
5776 * label file so that if we crash during recovery, we'll pick up at
5777 * the latest recovery restartpoint instead of going all the way back
5778 * to the backup start point. It seems prudent though to just rename
5779 * the file out of the way rather than delete it completely.
5780 */
5781 if (haveBackupLabel)
5782 {
5783 unlink(BACKUP_LABEL_OLD);
5785 }
5786
5787 /*
5788 * If there was a tablespace_map file, it's done its job and the
5789 * symlinks have been created. We must get rid of the map file so
5790 * that if we crash during recovery, we don't create symlinks again.
5791 * It seems prudent though to just rename the file out of the way
5792 * rather than delete it completely.
5793 */
5794 if (haveTblspcMap)
5795 {
5796 unlink(TABLESPACE_MAP_OLD);
5798 }
5799
5800 /*
5801 * Initialize our local copy of minRecoveryPoint. When doing crash
5802 * recovery we want to replay up to the end of WAL. Particularly, in
5803 * the case of a promoted standby minRecoveryPoint value in the
5804 * control file is only updated after the first checkpoint. However,
5805 * if the instance crashes before the first post-recovery checkpoint
5806 * is completed then recovery will use a stale location causing the
5807 * startup process to think that there are still invalid page
5808 * references when checking for data consistency.
5809 */
5811 {
5814 }
5815 else
5816 {
5819 }
5820
5821 /* Check that the GUCs used to generate the WAL allow recovery */
5823
5824 /*
5825 * We're in recovery, so unlogged relations may be trashed and must be
5826 * reset. This should be done BEFORE allowing Hot Standby
5827 * connections, so that read-only backends don't try to read whatever
5828 * garbage is left over from before.
5829 */
5831
5832 /*
5833 * Likewise, delete any saved transaction snapshot files that got left
5834 * behind by crashed backends.
5835 */
5837
5838 /*
5839 * Initialize for Hot Standby, if enabled. We won't let backends in
5840 * yet, not until we've reached the min recovery point specified in
5841 * control file and we've established a recovery snapshot from a
5842 * running-xacts WAL record.
5843 */
5845 {
5846 TransactionId *xids;
5847 int nxids;
5848
5850 (errmsg_internal("initializing for hot standby")));
5851
5853
5854 if (wasShutdown)
5855 oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
5856 else
5857 oldestActiveXID = checkPoint.oldestActiveXid;
5858 Assert(TransactionIdIsValid(oldestActiveXID));
5859
5860 /* Tell procarray about the range of xids it has to deal with */
5862
5863 /*
5864 * Startup subtrans only. CLOG, MultiXact and commit timestamp
5865 * have already been started up and other SLRUs are not maintained
5866 * during recovery and need not be started yet.
5867 */
5868 StartupSUBTRANS(oldestActiveXID);
5869
5870 /*
5871 * If we're beginning at a shutdown checkpoint, we know that
5872 * nothing was running on the primary at this point. So fake-up an
5873 * empty running-xacts record and use that here and now. Recover
5874 * additional standby state for prepared transactions.
5875 */
5876 if (wasShutdown)
5877 {
5879 TransactionId latestCompletedXid;
5880
5881 /* Update pg_subtrans entries for any prepared transactions */
5883
5884 /*
5885 * Construct a RunningTransactions snapshot representing a
5886 * shut down server, with only prepared transactions still
5887 * alive. We're never overflowed at this point because all
5888 * subxids are listed with their parent prepared transactions.
5889 */
5890 running.xcnt = nxids;
5891 running.subxcnt = 0;
5893 running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
5894 running.oldestRunningXid = oldestActiveXID;
5895 latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
5896 TransactionIdRetreat(latestCompletedXid);
5897 Assert(TransactionIdIsNormal(latestCompletedXid));
5898 running.latestCompletedXid = latestCompletedXid;
5899 running.xids = xids;
5900
5902 }
5903 }
5904
5905 /*
5906 * We're all set for replaying the WAL now. Do it.
5907 */
5909 performedWalRecovery = true;
5910 }
5911 else
5912 performedWalRecovery = false;
5913
5914 /*
5915 * Finish WAL recovery.
5916 */
5917 endOfRecoveryInfo = FinishWalRecovery();
5918 EndOfLog = endOfRecoveryInfo->endOfLog;
5919 EndOfLogTLI = endOfRecoveryInfo->endOfLogTLI;
5920 abortedRecPtr = endOfRecoveryInfo->abortedRecPtr;
5921 missingContrecPtr = endOfRecoveryInfo->missingContrecPtr;
5922
5923 /*
5924 * Reset ps status display, so as no information related to recovery shows
5925 * up.
5926 */
5927 set_ps_display("");
5928
5929 /*
5930 * When recovering from a backup (we are in recovery, and archive recovery
5931 * was requested), complain if we did not roll forward far enough to reach
5932 * the point where the database is consistent. For regular online
5933 * backup-from-primary, that means reaching the end-of-backup WAL record
5934 * (at which point we reset backupStartPoint to be Invalid), for
5935 * backup-from-replica (which can't inject records into the WAL stream),
5936 * that point is when we reach the minRecoveryPoint in pg_control (which
5937 * we purposefully copy last when backing up from a replica). For
5938 * pg_rewind (which creates a backup_label with a method of "pg_rewind")
5939 * or snapshot-style backups (which don't), backupEndRequired will be set
5940 * to false.
5941 *
5942 * Note: it is indeed okay to look at the local variable
5943 * LocalMinRecoveryPoint here, even though ControlFile->minRecoveryPoint
5944 * might be further ahead --- ControlFile->minRecoveryPoint cannot have
5945 * been advanced beyond the WAL we processed.
5946 */
5947 if (InRecovery &&
5948 (EndOfLog < LocalMinRecoveryPoint ||
5950 {
5951 /*
5952 * Ran off end of WAL before reaching end-of-backup WAL record, or
5953 * minRecoveryPoint. That's a bad sign, indicating that you tried to
5954 * recover from an online backup but never called pg_backup_stop(), or
5955 * you didn't archive all the WAL needed.
5956 */
5958 {
5960 ereport(FATAL,
5961 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5962 errmsg("WAL ends before end of online backup"),
5963 errhint("All WAL generated while online backup was taken must be available at recovery.")));
5964 else
5965 ereport(FATAL,
5966 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
5967 errmsg("WAL ends before consistent recovery point")));
5968 }
5969 }
5970
5971 /*
5972 * Reset unlogged relations to the contents of their INIT fork. This is
5973 * done AFTER recovery is complete so as to include any unlogged relations
5974 * created during recovery, but BEFORE recovery is marked as having
5975 * completed successfully. Otherwise we'd not retry if any of the post
5976 * end-of-recovery steps fail.
5977 */
5978 if (InRecovery)
5980
5981 /*
5982 * Pre-scan prepared transactions to find out the range of XIDs present.
5983 * This information is not quite needed yet, but it is positioned here so
5984 * as potential problems are detected before any on-disk change is done.
5985 */
5986 oldestActiveXID = PrescanPreparedTransactions(NULL, NULL);
5987
5988 /*
5989 * Allow ordinary WAL segment creation before possibly switching to a new
5990 * timeline, which creates a new segment, and after the last ReadRecord().
5991 */
5993
5994 /*
5995 * Consider whether we need to assign a new timeline ID.
5996 *
5997 * If we did archive recovery, we always assign a new ID. This handles a
5998 * couple of issues. If we stopped short of the end of WAL during
5999 * recovery, then we are clearly generating a new timeline and must assign
6000 * it a unique new ID. Even if we ran to the end, modifying the current
6001 * last segment is problematic because it may result in trying to
6002 * overwrite an already-archived copy of that segment, and we encourage
6003 * DBAs to make their archive_commands reject that. We can dodge the
6004 * problem by making the new active segment have a new timeline ID.
6005 *
6006 * In a normal crash recovery, we can just extend the timeline we were in.
6007 */
6008 newTLI = endOfRecoveryInfo->lastRecTLI;
6010 {
6012 ereport(LOG,
6013 (errmsg("selected new timeline ID: %u", newTLI)));
6014
6015 /*
6016 * Make a writable copy of the last WAL segment. (Note that we also
6017 * have a copy of the last block of the old WAL in
6018 * endOfRecovery->lastPage; we will use that below.)
6019 */
6020 XLogInitNewTimeline(EndOfLogTLI, EndOfLog, newTLI);
6021
6022 /*
6023 * Remove the signal files out of the way, so that we don't
6024 * accidentally re-enter archive recovery mode in a subsequent crash.
6025 */
6026 if (endOfRecoveryInfo->standby_signal_file_found)
6028
6029 if (endOfRecoveryInfo->recovery_signal_file_found)
6031
6032 /*
6033 * Write the timeline history file, and have it archived. After this
6034 * point (or rather, as soon as the file is archived), the timeline
6035 * will appear as "taken" in the WAL archive and to any standby
6036 * servers. If we crash before actually switching to the new
6037 * timeline, standby servers will nevertheless think that we switched
6038 * to the new timeline, and will try to connect to the new timeline.
6039 * To minimize the window for that, try to do as little as possible
6040 * between here and writing the end-of-recovery record.
6041 */
6043 EndOfLog, endOfRecoveryInfo->recoveryStopReason);
6044
6045 ereport(LOG,
6046 (errmsg("archive recovery complete")));
6047 }
6048
6049 /* Save the selected TimeLineID in shared memory, too */
6051 XLogCtl->InsertTimeLineID = newTLI;
6052 XLogCtl->PrevTimeLineID = endOfRecoveryInfo->lastRecTLI;
6054
6055 /*
6056 * Actually, if WAL ended in an incomplete record, skip the parts that
6057 * made it through and start writing after the portion that persisted.
6058 * (It's critical to first write an OVERWRITE_CONTRECORD message, which
6059 * we'll do as soon as we're open for writing new WAL.)
6060 */
6062 {
6063 /*
6064 * We should only have a missingContrecPtr if we're not switching to a
6065 * new timeline. When a timeline switch occurs, WAL is copied from the
6066 * old timeline to the new only up to the end of the last complete
6067 * record, so there can't be an incomplete WAL record that we need to
6068 * disregard.
6069 */
6070 Assert(newTLI == endOfRecoveryInfo->lastRecTLI);
6072 EndOfLog = missingContrecPtr;
6073 }
6074
6075 /*
6076 * Prepare to write WAL starting at EndOfLog location, and init xlog
6077 * buffer cache using the block containing the last record from the
6078 * previous incarnation.
6079 */
6080 Insert = &XLogCtl->Insert;
6081 Insert->PrevBytePos = XLogRecPtrToBytePos(endOfRecoveryInfo->lastRec);
6082 Insert->CurrBytePos = XLogRecPtrToBytePos(EndOfLog);
6083
6084 /*
6085 * Tricky point here: lastPage contains the *last* block that the LastRec
6086 * record spans, not the one it starts in. The last block is indeed the
6087 * one we want to use.
6088 */
6089 if (EndOfLog % XLOG_BLCKSZ != 0)
6090 {
6091 char *page;
6092 int len;
6093 int firstIdx;
6094
6095 firstIdx = XLogRecPtrToBufIdx(EndOfLog);
6096 len = EndOfLog - endOfRecoveryInfo->lastPageBeginPtr;
6097 Assert(len < XLOG_BLCKSZ);
6098
6099 /* Copy the valid part of the last block, and zero the rest */
6100 page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ];
6101 memcpy(page, endOfRecoveryInfo->lastPage, len);
6102 memset(page + len, 0, XLOG_BLCKSZ - len);
6103
6104 pg_atomic_write_u64(&XLogCtl->xlblocks[firstIdx], endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ);
6105 XLogCtl->InitializedUpTo = endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ;
6106 }
6107 else
6108 {
6109 /*
6110 * There is no partial block to copy. Just set InitializedUpTo, and
6111 * let the first attempt to insert a log record to initialize the next
6112 * buffer.
6113 */
6114 XLogCtl->InitializedUpTo = EndOfLog;
6115 }
6116
6117 /*
6118 * Update local and shared status. This is OK to do without any locks
6119 * because no other process can be reading or writing WAL yet.
6120 */
6121 LogwrtResult.Write = LogwrtResult.Flush = EndOfLog;
6125 XLogCtl->LogwrtRqst.Write = EndOfLog;
6126 XLogCtl->LogwrtRqst.Flush = EndOfLog;
6127
6128 /*
6129 * Preallocate additional log files, if wanted.
6130 */
6131 PreallocXlogFiles(EndOfLog, newTLI);
6132
6133 /*
6134 * Okay, we're officially UP.
6135 */
6136 InRecovery = false;
6137
6138 /* start the archive_timeout timer and LSN running */
6139 XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
6140 XLogCtl->lastSegSwitchLSN = EndOfLog;
6141
6142 /* also initialize latestCompletedXid, to nextXid - 1 */
6143 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
6146 LWLockRelease(ProcArrayLock);
6147
6148 /*
6149 * Start up subtrans, if not already done for hot standby. (commit
6150 * timestamps are started below, if necessary.)
6151 */
6153 StartupSUBTRANS(oldestActiveXID);
6154
6155 /*
6156 * Perform end of recovery actions for any SLRUs that need it.
6157 */
6158 TrimCLOG();
6159 TrimMultiXact();
6160
6161 /*
6162 * Reload shared-memory state for prepared transactions. This needs to
6163 * happen before renaming the last partial segment of the old timeline as
6164 * it may be possible that we have to recover some transactions from it.
6165 */
6167
6168 /* Shut down xlogreader */
6170
6171 /* Enable WAL writes for this backend only. */
6173
6174 /* If necessary, write overwrite-contrecord before doing anything else */
6176 {
6179 }
6180
6181 /*
6182 * Update full_page_writes in shared memory and write an XLOG_FPW_CHANGE
6183 * record before resource manager writes cleanup WAL records or checkpoint
6184 * record is written.
6185 */
6186 Insert->fullPageWrites = lastFullPageWrites;
6188
6189 /*
6190 * Emit checkpoint or end-of-recovery record in XLOG, if required.
6191 */
6192 if (performedWalRecovery)
6193 promoted = PerformRecoveryXLogAction();
6194
6195 /*
6196 * If any of the critical GUCs have changed, log them before we allow
6197 * backends to write WAL.
6198 */
6200
6201 /* If this is archive recovery, perform post-recovery cleanup actions. */
6203 CleanupAfterArchiveRecovery(EndOfLogTLI, EndOfLog, newTLI);
6204
6205 /*
6206 * Local WAL inserts enabled, so it's time to finish initialization of
6207 * commit timestamp.
6208 */
6210
6211 /* Clean up EndOfWalRecoveryInfo data to appease Valgrind leak checking */
6212 if (endOfRecoveryInfo->lastPage)
6213 pfree(endOfRecoveryInfo->lastPage);
6214 pfree(endOfRecoveryInfo->recoveryStopReason);
6215 pfree(endOfRecoveryInfo);
6216
6217 /*
6218 * All done with end-of-recovery actions.
6219 *
6220 * Now allow backends to write WAL and update the control file status in
6221 * consequence. SharedRecoveryState, that controls if backends can write
6222 * WAL, is updated while holding ControlFileLock to prevent other backends
6223 * to look at an inconsistent state of the control file in shared memory.
6224 * There is still a small window during which backends can write WAL and
6225 * the control file is still referring to a system not in DB_IN_PRODUCTION
6226 * state while looking at the on-disk control file.
6227 *
6228 * Also, we use info_lck to update SharedRecoveryState to ensure that
6229 * there are no race conditions concerning visibility of other recent
6230 * updates to shared memory.
6231 */
6232 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6234
6238
6240 LWLockRelease(ControlFileLock);
6241
6242 /*
6243 * Wake up all waiters for replay LSN. They need to report an error that
6244 * recovery was ended before reaching the target LSN.
6245 */
6247
6248 /*
6249 * Shutdown the recovery environment. This must occur after
6250 * RecoverPreparedTransactions() (see notes in lock_twophase_recover())
6251 * and after switching SharedRecoveryState to RECOVERY_STATE_DONE so as
6252 * any session building a snapshot will not rely on KnownAssignedXids as
6253 * RecoveryInProgress() would return false at this stage. This is
6254 * particularly critical for prepared 2PC transactions, that would still
6255 * need to be included in snapshots once recovery has ended.
6256 */
6259
6260 /*
6261 * If there were cascading standby servers connected to us, nudge any wal
6262 * sender processes to notice that we've been promoted.
6263 */
6264 WalSndWakeup(true, true);
6265
6266 /*
6267 * If this was a promotion, request an (online) checkpoint now. This isn't
6268 * required for consistency, but the last restartpoint might be far back,
6269 * and in case of a crash, recovering from it might take a longer than is
6270 * appropriate now that we're not in standby mode anymore.
6271 */
6272 if (promoted)
6274}
static void pg_atomic_write_membarrier_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:492
TimeLineID findNewestTimeLine(TimeLineID startTLI)
Definition: timeline.c:264
void restoreTimeLineHistoryFiles(TimeLineID begin, TimeLineID end)
Definition: timeline.c:50
void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, XLogRecPtr switchpoint, char *reason)
Definition: timeline.c:304
void startup_progress_timeout_handler(void)
Definition: startup.c:303
uint32 TransactionId
Definition: c.h:662
void StartupCLOG(void)
Definition: clog.c:843
void TrimCLOG(void)
Definition: clog.c:858
void StartupCommitTs(void)
Definition: commit_ts.c:608
void CompleteCommitTsInitialization(void)
Definition: commit_ts.c:618
void SyncDataDirectory(void)
Definition: fd.c:3606
#define IsBootstrapProcessingMode()
Definition: miscadmin.h:477
void TrimMultiXact(void)
Definition: multixact.c:2108
void StartupMultiXact(void)
Definition: multixact.c:2083
void StartupReplicationOrigin(void)
Definition: origin.c:722
@ DB_IN_PRODUCTION
Definition: pg_control.h:97
@ DB_IN_CRASH_RECOVERY
Definition: pg_control.h:95
const void size_t len
void pgstat_restore_stats(void)
Definition: pgstat.c:507
void pgstat_discard_stats(void)
Definition: pgstat.c:519
void ProcArrayApplyRecoveryInfo(RunningTransactions running)
Definition: procarray.c:1054
void ProcArrayInitRecovery(TransactionId initializedUptoXID)
Definition: procarray.c:1023
static void set_ps_display(const char *activity)
Definition: ps_status.h:40
void ResetUnloggedRelations(int op)
Definition: reinit.c:47
#define UNLOGGED_RELATION_INIT
Definition: reinit.h:28
#define UNLOGGED_RELATION_CLEANUP
Definition: reinit.h:27
void RelationCacheInitFileRemove(void)
Definition: relcache.c:6900
void StartupReorderBuffer(void)
void StartupReplicationSlots(void)
Definition: slot.c:2197
void DeleteAllExportedSnapshotFiles(void)
Definition: snapmgr.c:1585
void InitRecoveryTransactionEnvironment(void)
Definition: standby.c:95
void ShutdownRecoveryTransactionEnvironment(void)
Definition: standby.c:161
@ SUBXIDS_IN_SUBTRANS
Definition: standby.h:82
XLogRecPtr lastPageBeginPtr
Definition: xlogrecovery.h:121
XLogRecPtr abortedRecPtr
Definition: xlogrecovery.h:130
XLogRecPtr missingContrecPtr
Definition: xlogrecovery.h:131
TimeLineID endOfLogTLI
Definition: xlogrecovery.h:119
TransactionId oldestRunningXid
Definition: standby.h:92
TransactionId nextXid
Definition: standby.h:91
TransactionId latestCompletedXid
Definition: standby.h:95
subxids_array_status subxid_status
Definition: standby.h:90
TransactionId * xids
Definition: standby.h:97
FullTransactionId latestCompletedXid
Definition: transam.h:238
pg_atomic_uint64 logInsertResult
Definition: xlog.c:472
void StartupSUBTRANS(TransactionId oldestActiveXID)
Definition: subtrans.c:283
TimeoutId RegisterTimeout(TimeoutId id, timeout_handler_proc handler)
Definition: timeout.c:505
@ STARTUP_PROGRESS_TIMEOUT
Definition: timeout.h:38
#define TransactionIdRetreat(dest)
Definition: transam.h:141
static void FullTransactionIdRetreat(FullTransactionId *dest)
Definition: transam.h:103
#define XidFromFullTransactionId(x)
Definition: transam.h:48
#define TransactionIdIsValid(xid)
Definition: transam.h:41
#define TransactionIdIsNormal(xid)
Definition: transam.h:42
void RecoverPreparedTransactions(void)
Definition: twophase.c:2083
void restoreTwoPhaseData(void)
Definition: twophase.c:1904
TransactionId PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
Definition: twophase.c:1966
void StandbyRecoverPreparedTransactions(void)
Definition: twophase.c:2045
void WalSndWakeup(bool physical, bool logical)
Definition: walsender.c:3793
void UpdateFullPageWrites(void)
Definition: xlog.c:8236
static void ValidateXLOGDirectoryStructure(void)
Definition: xlog.c:4118
static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, TimeLineID newTLI)
Definition: xlog.c:7509
static void XLogReportParameters(void)
Definition: xlog.c:8173
static bool PerformRecoveryXLogAction(void)
Definition: xlog.c:6356
static void CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog, TimeLineID newTLI)
Definition: xlog.c:5343
static bool lastFullPageWrites
Definition: xlog.c:219
static void XLogInitNewTimeline(TimeLineID endTLI, XLogRecPtr endOfLog, TimeLineID newTLI)
Definition: xlog.c:5268
static void CheckRequiredParameterValues(void)
Definition: xlog.c:5439
static void RemoveTempXlogFiles(void)
Definition: xlog.c:3851
static char * str_time(pg_time_t tnow, char *buf, size_t bufsize)
Definition: xlog.c:5255
#define TABLESPACE_MAP_OLD
Definition: xlog.h:309
#define TABLESPACE_MAP
Definition: xlog.h:308
#define STANDBY_SIGNAL_FILE
Definition: xlog.h:304
#define BACKUP_LABEL_OLD
Definition: xlog.h:306
#define BACKUP_LABEL_FILE
Definition: xlog.h:305
#define RECOVERY_SIGNAL_FILE
Definition: xlog.h:303
@ RECOVERY_STATE_CRASH
Definition: xlog.h:91
@ RECOVERY_STATE_ARCHIVE
Definition: xlog.h:92
#define XRecOffIsValid(xlrp)
void ShutdownWalRecovery(void)
bool InArchiveRecovery
Definition: xlogrecovery.c:141
void PerformWalRecovery(void)
static XLogRecPtr missingContrecPtr
Definition: xlogrecovery.c:381
static XLogRecPtr abortedRecPtr
Definition: xlogrecovery.c:380
EndOfWalRecoveryInfo * FinishWalRecovery(void)
void InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, bool *haveBackupLabel_ptr, bool *haveTblspcMap_ptr)
Definition: xlogrecovery.c:520
TimeLineID recoveryTargetTLI
Definition: xlogrecovery.c:125
HotStandbyState standbyState
Definition: xlogutils.c:53
bool InRecovery
Definition: xlogutils.c:50
@ STANDBY_DISABLED
Definition: xlogutils.h:52
void WaitLSNWakeup(WaitLSNType lsnType, XLogRecPtr currentLSN)
Definition: xlogwait.c:269
@ WAIT_LSN_TYPE_REPLAY
Definition: xlogwait.h:38

References abortedRecPtr, EndOfWalRecoveryInfo::abortedRecPtr, AdvanceOldestClogXid(), ArchiveRecoveryRequested, Assert(), AuxProcessResourceOwner, BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, ControlFileData::backupEndRequired, ControlFileData::backupStartPoint, ControlFileData::checkPoint, CHECKPOINT_FORCE, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), CleanupAfterArchiveRecovery(), CompleteCommitTsInitialization(), ControlFile, CreateOverwriteContrecordRecord(), CurrentResourceOwner, DB_IN_ARCHIVE_RECOVERY, DB_IN_CRASH_RECOVERY, DB_IN_PRODUCTION, DB_SHUTDOWNED, DB_SHUTDOWNED_IN_RECOVERY, DB_SHUTDOWNING, DEBUG1, DeleteAllExportedSnapshotFiles(), doPageWrites, durable_rename(), durable_unlink(), EnableHotStandby, EndOfWalRecoveryInfo::endOfLog, EndOfWalRecoveryInfo::endOfLogTLI, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errhint(), errmsg(), errmsg_internal(), FATAL, findNewestTimeLine(), FinishWalRecovery(), FirstNormalUnloggedLSN, XLogwrtRqst::Flush, XLogwrtResult::Flush, CheckPoint::fullPageWrites, FullTransactionIdRetreat(), InArchiveRecovery, XLogCtlData::info_lck, XLogCtlData::InitializedUpTo, InitRecoveryTransactionEnvironment(), InitWalRecovery(), InRecovery, XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, IsBootstrapProcessingMode, IsPostmasterEnvironment, lastFullPageWrites, EndOfWalRecoveryInfo::lastPage, EndOfWalRecoveryInfo::lastPageBeginPtr, EndOfWalRecoveryInfo::lastRec, EndOfWalRecoveryInfo::lastRecTLI, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, TransamVariablesData::latestCompletedXid, RunningTransactionsData::latestCompletedXid, len, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LocalSetXLogInsertAllowed(), LOG, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, missingContrecPtr, EndOfWalRecoveryInfo::missingContrecPtr, MultiXactSetNextMXact(), CheckPoint::newestCommitTsXid, CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, NOTICE, TransamVariablesData::oidCount, CheckPoint::oldestActiveXid, CheckPoint::oldestCommitTsXid, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, XLogCtlData::pages, PerformRecoveryXLogAction(), PerformWalRecovery(), pfree(), pg_atomic_write_membarrier_u64(), pg_atomic_write_u64(), pg_usleep(), pgstat_discard_stats(), pgstat_restore_stats(), PreallocXlogFiles(), PrescanPreparedTransactions(), XLogCtlData::PrevTimeLineID, ProcArrayApplyRecoveryInfo(), ProcArrayInitRecovery(), RecoverPreparedTransactions(), RECOVERY_SIGNAL_FILE, EndOfWalRecoveryInfo::recovery_signal_file_found, RECOVERY_STATE_ARCHIVE, RECOVERY_STATE_CRASH, RECOVERY_STATE_DONE, EndOfWalRecoveryInfo::recoveryStopReason, recoveryTargetTLI, CheckPoint::redo, RedoRecPtr, XLogCtlInsert::RedoRecPtr, XLogCtlData::RedoRecPtr, RegisterTimeout(), RelationCacheInitFileRemove(), RemoveTempXlogFiles(), RequestCheckpoint(), ResetUnloggedRelations(), restoreTimeLineHistoryFiles(), restoreTwoPhaseData(), set_ps_display(), SetCommitTsLimit(), SetInstallXLogFileSegmentActive(), SetMultiXactIdLimit(), SetTransactionIdLimit(), XLogCtlData::SharedRecoveryState, ShutdownRecoveryTransactionEnvironment(), ShutdownWalRecovery(), SpinLockAcquire, SpinLockRelease, STANDBY_DISABLED, STANDBY_SIGNAL_FILE, EndOfWalRecoveryInfo::standby_signal_file_found, StandbyRecoverPreparedTransactions(), standbyState, STARTUP_PROGRESS_TIMEOUT, startup_progress_timeout_handler(), StartupCLOG(), StartupCommitTs(), StartupMultiXact(), StartupReorderBuffer(), StartupReplicationOrigin(), StartupReplicationSlots(), StartupSUBTRANS(), ControlFileData::state, str_time(), RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, SyncDataDirectory(), TABLESPACE_MAP, TABLESPACE_MAP_OLD, CheckPoint::ThisTimeLineID, CheckPoint::time, ControlFileData::time, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdIsValid, TransactionIdRetreat, TransamVariables, TrimCLOG(), TrimMultiXact(), UNLOGGED_RELATION_CLEANUP, UNLOGGED_RELATION_INIT, XLogCtlData::unloggedLSN, ControlFileData::unloggedLSN, UpdateControlFile(), UpdateFullPageWrites(), ValidateXLOGDirectoryStructure(), WAIT_LSN_TYPE_REPLAY, WaitLSNWakeup(), WalSndWakeup(), XLogwrtRqst::Write, XLogwrtResult::Write, writeTimeLineHistory(), RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLogCtlData::xlblocks, XLogCtl, XLogInitNewTimeline(), XLogRecPtrIsValid, XLogRecPtrToBufIdx, XLogRecPtrToBytePos(), XLogReportParameters(), and XRecOffIsValid.

Referenced by InitPostgres(), and StartupProcessMain().

◆ str_time()

static char * str_time ( pg_time_t  tnow,
char *  buf,
size_t  bufsize 
)
static

Definition at line 5255 of file xlog.c.

5256{
5258 "%Y-%m-%d %H:%M:%S %Z",
5259 pg_localtime(&tnow, log_timezone));
5260
5261 return buf;
5262}
#define bufsize
Definition: indent_globs.h:36
static char * buf
Definition: pg_test_fsync.c:72
size_t pg_strftime(char *s, size_t maxsize, const char *format, const struct pg_tm *t)
Definition: strftime.c:128
struct pg_tm * pg_localtime(const pg_time_t *timep, const pg_tz *tz)
Definition: localtime.c:1345
PGDLLIMPORT pg_tz * log_timezone
Definition: pgtz.c:31

References buf, bufsize, log_timezone, pg_localtime(), and pg_strftime().

Referenced by StartupXLOG().

◆ SwitchIntoArchiveRecovery()

void SwitchIntoArchiveRecovery ( XLogRecPtr  EndRecPtr,
TimeLineID  replayTLI 
)

Definition at line 6281 of file xlog.c.

6282{
6283 /* initialize minRecoveryPoint to this record */
6284 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
6286 if (ControlFile->minRecoveryPoint < EndRecPtr)
6287 {
6288 ControlFile->minRecoveryPoint = EndRecPtr;
6289 ControlFile->minRecoveryPointTLI = replayTLI;
6290 }
6291 /* update local copy */
6294
6295 /*
6296 * The startup process can update its local copy of minRecoveryPoint from
6297 * this point.
6298 */
6300
6302
6303 /*
6304 * We update SharedRecoveryState while holding the lock on ControlFileLock
6305 * so both states are consistent in shared memory.
6306 */
6310
6311 LWLockRelease(ControlFileLock);
6312}
static bool updateMinRecoveryPoint
Definition: xlog.c:649

References ControlFile, DB_IN_ARCHIVE_RECOVERY, XLogCtlData::info_lck, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RECOVERY_STATE_ARCHIVE, XLogCtlData::SharedRecoveryState, SpinLockAcquire, SpinLockRelease, ControlFileData::state, UpdateControlFile(), updateMinRecoveryPoint, and XLogCtl.

Referenced by ReadRecord().

◆ update_checkpoint_display()

static void update_checkpoint_display ( int  flags,
bool  restartpoint,
bool  reset 
)
static

Definition at line 6896 of file xlog.c.

6897{
6898 /*
6899 * The status is reported only for end-of-recovery and shutdown
6900 * checkpoints or shutdown restartpoints. Updating the ps display is
6901 * useful in those situations as it may not be possible to rely on
6902 * pg_stat_activity to see the status of the checkpointer or the startup
6903 * process.
6904 */
6906 return;
6907
6908 if (reset)
6909 set_ps_display("");
6910 else
6911 {
6912 char activitymsg[128];
6913
6914 snprintf(activitymsg, sizeof(activitymsg), "performing %s%s%s",
6915 (flags & CHECKPOINT_END_OF_RECOVERY) ? "end-of-recovery " : "",
6916 (flags & CHECKPOINT_IS_SHUTDOWN) ? "shutdown " : "",
6917 restartpoint ? "restartpoint" : "checkpoint");
6918 set_ps_display(activitymsg);
6919 }
6920}

References CHECKPOINT_END_OF_RECOVERY, CHECKPOINT_IS_SHUTDOWN, reset(), set_ps_display(), and snprintf.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateCheckPointDistanceEstimate()

static void UpdateCheckPointDistanceEstimate ( uint64  nbytes)
static

Definition at line 6858 of file xlog.c.

6859{
6860 /*
6861 * To estimate the number of segments consumed between checkpoints, keep a
6862 * moving average of the amount of WAL generated in previous checkpoint
6863 * cycles. However, if the load is bursty, with quiet periods and busy
6864 * periods, we want to cater for the peak load. So instead of a plain
6865 * moving average, let the average decline slowly if the previous cycle
6866 * used less WAL than estimated, but bump it up immediately if it used
6867 * more.
6868 *
6869 * When checkpoints are triggered by max_wal_size, this should converge to
6870 * CheckpointSegments * wal_segment_size,
6871 *
6872 * Note: This doesn't pay any attention to what caused the checkpoint.
6873 * Checkpoints triggered manually with CHECKPOINT command, or by e.g.
6874 * starting a base backup, are counted the same as those created
6875 * automatically. The slow-decline will largely mask them out, if they are
6876 * not frequent. If they are frequent, it seems reasonable to count them
6877 * in as any others; if you issue a manual checkpoint every 5 minutes and
6878 * never let a timed checkpoint happen, it makes sense to base the
6879 * preallocation on that 5 minute interval rather than whatever
6880 * checkpoint_timeout is set to.
6881 */
6882 PrevCheckPointDistance = nbytes;
6883 if (CheckPointDistanceEstimate < nbytes)
6885 else
6887 (0.90 * CheckPointDistanceEstimate + 0.10 * (double) nbytes);
6888}

References CheckPointDistanceEstimate, and PrevCheckPointDistance.

Referenced by CreateCheckPoint(), and CreateRestartPoint().

◆ UpdateControlFile()

static void UpdateControlFile ( void  )
static

Definition at line 4600 of file xlog.c.

4601{
4603}
void update_controlfile(const char *DataDir, ControlFileData *ControlFile, bool do_sync)

References ControlFile, DataDir, and update_controlfile().

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), ReachedEndOfBackup(), StartupXLOG(), SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), xlog_redo(), and XLogReportParameters().

◆ UpdateFullPageWrites()

void UpdateFullPageWrites ( void  )

Definition at line 8236 of file xlog.c.

8237{
8239 bool recoveryInProgress;
8240
8241 /*
8242 * Do nothing if full_page_writes has not been changed.
8243 *
8244 * It's safe to check the shared full_page_writes without the lock,
8245 * because we assume that there is no concurrently running process which
8246 * can update it.
8247 */
8248 if (fullPageWrites == Insert->fullPageWrites)
8249 return;
8250
8251 /*
8252 * Perform this outside critical section so that the WAL insert
8253 * initialization done by RecoveryInProgress() doesn't trigger an
8254 * assertion failure.
8255 */
8256 recoveryInProgress = RecoveryInProgress();
8257
8259
8260 /*
8261 * It's always safe to take full page images, even when not strictly
8262 * required, but not the other round. So if we're setting full_page_writes
8263 * to true, first set it true and then write the WAL record. If we're
8264 * setting it to false, first write the WAL record and then set the global
8265 * flag.
8266 */
8267 if (fullPageWrites)
8268 {
8270 Insert->fullPageWrites = true;
8272 }
8273
8274 /*
8275 * Write an XLOG_FPW_CHANGE record. This allows us to keep track of
8276 * full_page_writes during archive recovery, if required.
8277 */
8278 if (XLogStandbyInfoActive() && !recoveryInProgress)
8279 {
8281 XLogRegisterData(&fullPageWrites, sizeof(bool));
8282
8283 XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
8284 }
8285
8286 if (!fullPageWrites)
8287 {
8289 Insert->fullPageWrites = false;
8291 }
8293}
#define XLOG_FPW_CHANGE
Definition: pg_control.h:76

References END_CRIT_SECTION, fullPageWrites, XLogCtlData::Insert, Insert(), RecoveryInProgress(), START_CRIT_SECTION, WALInsertLockAcquireExclusive(), WALInsertLockRelease(), XLOG_FPW_CHANGE, XLogBeginInsert(), XLogCtl, XLogInsert(), XLogRegisterData(), and XLogStandbyInfoActive.

Referenced by StartupXLOG(), and UpdateSharedMemoryConfig().

◆ UpdateLastRemovedPtr()

static void UpdateLastRemovedPtr ( char *  filename)
static

Definition at line 3831 of file xlog.c.

3832{
3833 uint32 tli;
3834 XLogSegNo segno;
3835
3837
3839 if (segno > XLogCtl->lastRemovedSegNo)
3840 XLogCtl->lastRemovedSegNo = segno;
3842}
static void XLogFromFileName(const char *fname, TimeLineID *tli, XLogSegNo *logSegNo, int wal_segsz_bytes)

References filename, XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, wal_segment_size, XLogCtl, and XLogFromFileName().

Referenced by RemoveOldXlogFiles().

◆ UpdateMinRecoveryPoint()

static void UpdateMinRecoveryPoint ( XLogRecPtr  lsn,
bool  force 
)
static

Definition at line 2703 of file xlog.c.

2704{
2705 /* Quick check using our local copy of the variable */
2706 if (!updateMinRecoveryPoint || (!force && lsn <= LocalMinRecoveryPoint))
2707 return;
2708
2709 /*
2710 * An invalid minRecoveryPoint means that we need to recover all the WAL,
2711 * i.e., we're doing crash recovery. We never modify the control file's
2712 * value in that case, so we can short-circuit future checks here too. The
2713 * local values of minRecoveryPoint and minRecoveryPointTLI should not be
2714 * updated until crash recovery finishes. We only do this for the startup
2715 * process as it should not update its own reference of minRecoveryPoint
2716 * until it has finished crash recovery to make sure that all WAL
2717 * available is replayed in this case. This also saves from extra locks
2718 * taken on the control file from the startup process.
2719 */
2721 {
2722 updateMinRecoveryPoint = false;
2723 return;
2724 }
2725
2726 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
2727
2728 /* update local copy */
2731
2733 updateMinRecoveryPoint = false;
2734 else if (force || LocalMinRecoveryPoint < lsn)
2735 {
2736 XLogRecPtr newMinRecoveryPoint;
2737 TimeLineID newMinRecoveryPointTLI;
2738
2739 /*
2740 * To avoid having to update the control file too often, we update it
2741 * all the way to the last record being replayed, even though 'lsn'
2742 * would suffice for correctness. This also allows the 'force' case
2743 * to not need a valid 'lsn' value.
2744 *
2745 * Another important reason for doing it this way is that the passed
2746 * 'lsn' value could be bogus, i.e., past the end of available WAL, if
2747 * the caller got it from a corrupted heap page. Accepting such a
2748 * value as the min recovery point would prevent us from coming up at
2749 * all. Instead, we just log a warning and continue with recovery.
2750 * (See also the comments about corrupt LSNs in XLogFlush.)
2751 */
2752 newMinRecoveryPoint = GetCurrentReplayRecPtr(&newMinRecoveryPointTLI);
2753 if (!force && newMinRecoveryPoint < lsn)
2754 elog(WARNING,
2755 "xlog min recovery request %X/%08X is past current point %X/%08X",
2756 LSN_FORMAT_ARGS(lsn), LSN_FORMAT_ARGS(newMinRecoveryPoint));
2757
2758 /* update control file */
2759 if (ControlFile->minRecoveryPoint < newMinRecoveryPoint)
2760 {
2761 ControlFile->minRecoveryPoint = newMinRecoveryPoint;
2762 ControlFile->minRecoveryPointTLI = newMinRecoveryPointTLI;
2764 LocalMinRecoveryPoint = newMinRecoveryPoint;
2765 LocalMinRecoveryPointTLI = newMinRecoveryPointTLI;
2766
2768 errmsg_internal("updated min recovery point to %X/%08X on timeline %u",
2769 LSN_FORMAT_ARGS(newMinRecoveryPoint),
2770 newMinRecoveryPointTLI));
2771 }
2772 }
2773 LWLockRelease(ControlFileLock);
2774}
XLogRecPtr GetCurrentReplayRecPtr(TimeLineID *replayEndTLI)

References ControlFile, DEBUG2, elog, ereport, errmsg_internal(), GetCurrentReplayRecPtr(), InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, UpdateControlFile(), updateMinRecoveryPoint, WARNING, and XLogRecPtrIsValid.

Referenced by CreateRestartPoint(), XLogFlush(), and XLogInitNewTimeline().

◆ ValidateXLOGDirectoryStructure()

static void ValidateXLOGDirectoryStructure ( void  )
static

Definition at line 4118 of file xlog.c.

4119{
4120 char path[MAXPGPATH];
4121 struct stat stat_buf;
4122
4123 /* Check for pg_wal; if it doesn't exist, error out */
4124 if (stat(XLOGDIR, &stat_buf) != 0 ||
4125 !S_ISDIR(stat_buf.st_mode))
4126 ereport(FATAL,
4128 errmsg("required WAL directory \"%s\" does not exist",
4129 XLOGDIR)));
4130
4131 /* Check for archive_status */
4132 snprintf(path, MAXPGPATH, XLOGDIR "/archive_status");
4133 if (stat(path, &stat_buf) == 0)
4134 {
4135 /* Check for weird cases where it exists but isn't a directory */
4136 if (!S_ISDIR(stat_buf.st_mode))
4137 ereport(FATAL,
4139 errmsg("required WAL directory \"%s\" does not exist",
4140 path)));
4141 }
4142 else
4143 {
4144 ereport(LOG,
4145 (errmsg("creating missing WAL directory \"%s\"", path)));
4146 if (MakePGDirectory(path) < 0)
4147 ereport(FATAL,
4149 errmsg("could not create missing directory \"%s\": %m",
4150 path)));
4151 }
4152
4153 /* Check for summaries */
4154 snprintf(path, MAXPGPATH, XLOGDIR "/summaries");
4155 if (stat(path, &stat_buf) == 0)
4156 {
4157 /* Check for weird cases where it exists but isn't a directory */
4158 if (!S_ISDIR(stat_buf.st_mode))
4159 ereport(FATAL,
4160 (errmsg("required WAL directory \"%s\" does not exist",
4161 path)));
4162 }
4163 else
4164 {
4165 ereport(LOG,
4166 (errmsg("creating missing WAL directory \"%s\"", path)));
4167 if (MakePGDirectory(path) < 0)
4168 ereport(FATAL,
4169 (errmsg("could not create missing directory \"%s\": %m",
4170 path)));
4171 }
4172}
int MakePGDirectory(const char *directoryName)
Definition: fd.c:3975
#define S_ISDIR(m)
Definition: win32_port.h:315

References ereport, errcode_for_file_access(), errmsg(), FATAL, LOG, MakePGDirectory(), MAXPGPATH, S_ISDIR, snprintf, stat::st_mode, stat, and XLOGDIR.

Referenced by StartupXLOG().

◆ WaitXLogInsertionsToFinish()

static XLogRecPtr WaitXLogInsertionsToFinish ( XLogRecPtr  upto)
static

Definition at line 1510 of file xlog.c.

1511{
1512 uint64 bytepos;
1513 XLogRecPtr inserted;
1514 XLogRecPtr reservedUpto;
1515 XLogRecPtr finishedUpto;
1517 int i;
1518
1519 if (MyProc == NULL)
1520 elog(PANIC, "cannot wait without a PGPROC structure");
1521
1522 /*
1523 * Check if there's any work to do. Use a barrier to ensure we get the
1524 * freshest value.
1525 */
1527 if (upto <= inserted)
1528 return inserted;
1529
1530 /* Read the current insert position */
1531 SpinLockAcquire(&Insert->insertpos_lck);
1532 bytepos = Insert->CurrBytePos;
1533 SpinLockRelease(&Insert->insertpos_lck);
1534 reservedUpto = XLogBytePosToEndRecPtr(bytepos);
1535
1536 /*
1537 * No-one should request to flush a piece of WAL that hasn't even been
1538 * reserved yet. However, it can happen if there is a block with a bogus
1539 * LSN on disk, for example. XLogFlush checks for that situation and
1540 * complains, but only after the flush. Here we just assume that to mean
1541 * that all WAL that has been reserved needs to be finished. In this
1542 * corner-case, the return value can be smaller than 'upto' argument.
1543 */
1544 if (upto > reservedUpto)
1545 {
1546 ereport(LOG,
1547 errmsg("request to flush past end of generated WAL; request %X/%08X, current position %X/%08X",
1548 LSN_FORMAT_ARGS(upto), LSN_FORMAT_ARGS(reservedUpto)));
1549 upto = reservedUpto;
1550 }
1551
1552 /*
1553 * Loop through all the locks, sleeping on any in-progress insert older
1554 * than 'upto'.
1555 *
1556 * finishedUpto is our return value, indicating the point upto which all
1557 * the WAL insertions have been finished. Initialize it to the head of
1558 * reserved WAL, and as we iterate through the insertion locks, back it
1559 * out for any insertion that's still in progress.
1560 */
1561 finishedUpto = reservedUpto;
1562 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1563 {
1564 XLogRecPtr insertingat = InvalidXLogRecPtr;
1565
1566 do
1567 {
1568 /*
1569 * See if this insertion is in progress. LWLockWaitForVar will
1570 * wait for the lock to be released, or for the 'value' to be set
1571 * by a LWLockUpdateVar call. When a lock is initially acquired,
1572 * its value is 0 (InvalidXLogRecPtr), which means that we don't
1573 * know where it's inserting yet. We will have to wait for it. If
1574 * it's a small insertion, the record will most likely fit on the
1575 * same page and the inserter will release the lock without ever
1576 * calling LWLockUpdateVar. But if it has to sleep, it will
1577 * advertise the insertion point with LWLockUpdateVar before
1578 * sleeping.
1579 *
1580 * In this loop we are only waiting for insertions that started
1581 * before WaitXLogInsertionsToFinish was called. The lack of
1582 * memory barriers in the loop means that we might see locks as
1583 * "unused" that have since become used. This is fine because
1584 * they only can be used for later insertions that we would not
1585 * want to wait on anyway. Not taking a lock to acquire the
1586 * current insertingAt value means that we might see older
1587 * insertingAt values. This is also fine, because if we read a
1588 * value too old, we will add ourselves to the wait queue, which
1589 * contains atomic operations.
1590 */
1591 if (LWLockWaitForVar(&WALInsertLocks[i].l.lock,
1593 insertingat, &insertingat))
1594 {
1595 /* the lock was free, so no insertion in progress */
1596 insertingat = InvalidXLogRecPtr;
1597 break;
1598 }
1599
1600 /*
1601 * This insertion is still in progress. Have to wait, unless the
1602 * inserter has proceeded past 'upto'.
1603 */
1604 } while (insertingat < upto);
1605
1606 if (XLogRecPtrIsValid(insertingat) && insertingat < finishedUpto)
1607 finishedUpto = insertingat;
1608 }
1609
1610 /*
1611 * Advance the limit we know to have been inserted and return the freshest
1612 * value we know of, which might be beyond what we requested if somebody
1613 * is concurrently doing this with an 'upto' pointer ahead of us.
1614 */
1616 finishedUpto);
1617
1618 return finishedUpto;
1619}
static uint64 pg_atomic_monotonic_advance_u64(volatile pg_atomic_uint64 *ptr, uint64 target)
Definition: atomics.h:583
bool LWLockWaitForVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 oldval, uint64 *newval)
Definition: lwlock.c:1590
PGPROC * MyProc
Definition: proc.c:67
pg_atomic_uint64 insertingAt
Definition: xlog.c:372

References elog, ereport, errmsg(), i, XLogCtlData::Insert, Insert(), WALInsertLock::insertingAt, InvalidXLogRecPtr, WALInsertLockPadded::l, LOG, XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, LWLockWaitForVar(), MyProc, NUM_XLOGINSERT_LOCKS, PANIC, pg_atomic_monotonic_advance_u64(), pg_atomic_read_membarrier_u64(), SpinLockAcquire, SpinLockRelease, WALInsertLocks, XLogBytePosToEndRecPtr(), XLogCtl, and XLogRecPtrIsValid.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

◆ WALInsertLockAcquire()

static void WALInsertLockAcquire ( void  )
static

Definition at line 1377 of file xlog.c.

1378{
1379 bool immed;
1380
1381 /*
1382 * It doesn't matter which of the WAL insertion locks we acquire, so try
1383 * the one we used last time. If the system isn't particularly busy, it's
1384 * a good bet that it's still available, and it's good to have some
1385 * affinity to a particular lock so that you don't unnecessarily bounce
1386 * cache lines between processes when there's no contention.
1387 *
1388 * If this is the first time through in this backend, pick a lock
1389 * (semi-)randomly. This allows the locks to be used evenly if you have a
1390 * lot of very short connections.
1391 */
1392 static int lockToTry = -1;
1393
1394 if (lockToTry == -1)
1395 lockToTry = MyProcNumber % NUM_XLOGINSERT_LOCKS;
1396 MyLockNo = lockToTry;
1397
1398 /*
1399 * The insertingAt value is initially set to 0, as we don't know our
1400 * insert location yet.
1401 */
1403 if (!immed)
1404 {
1405 /*
1406 * If we couldn't get the lock immediately, try another lock next
1407 * time. On a system with more insertion locks than concurrent
1408 * inserters, this causes all the inserters to eventually migrate to a
1409 * lock that no-one else is using. On a system with more inserters
1410 * than locks, it still helps to distribute the inserters evenly
1411 * across the locks.
1412 */
1413 lockToTry = (lockToTry + 1) % NUM_XLOGINSERT_LOCKS;
1414 }
1415}
ProcNumber MyProcNumber
Definition: globals.c:90
static int MyLockNo
Definition: xlog.c:652

References LW_EXCLUSIVE, LWLockAcquire(), MyLockNo, MyProcNumber, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateOverwriteContrecordRecord(), and XLogInsertRecord().

◆ WALInsertLockAcquireExclusive()

static void WALInsertLockAcquireExclusive ( void  )
static

Definition at line 1422 of file xlog.c.

1423{
1424 int i;
1425
1426 /*
1427 * When holding all the locks, all but the last lock's insertingAt
1428 * indicator is set to 0xFFFFFFFFFFFFFFFF, which is higher than any real
1429 * XLogRecPtr value, to make sure that no-one blocks waiting on those.
1430 */
1431 for (i = 0; i < NUM_XLOGINSERT_LOCKS - 1; i++)
1432 {
1437 }
1438 /* Variable value reset to 0 at release */
1440
1441 holdingAllLocks = true;
1442}
#define PG_UINT64_MAX
Definition: c.h:603
void LWLockUpdateVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition: lwlock.c:1726
static bool holdingAllLocks
Definition: xlog.c:653

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LW_EXCLUSIVE, LWLockAcquire(), LWLockUpdateVar(), NUM_XLOGINSERT_LOCKS, PG_UINT64_MAX, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockRelease()

static void WALInsertLockRelease ( void  )
static

Definition at line 1451 of file xlog.c.

1452{
1453 if (holdingAllLocks)
1454 {
1455 int i;
1456
1457 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
1460 0);
1461
1462 holdingAllLocks = false;
1463 }
1464 else
1465 {
1468 0);
1469 }
1470}
void LWLockReleaseClearVar(LWLock *lock, pg_atomic_uint64 *valptr, uint64 val)
Definition: lwlock.c:1923

References holdingAllLocks, i, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockReleaseClearVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_abort_backup(), do_pg_backup_start(), do_pg_backup_stop(), UpdateFullPageWrites(), and XLogInsertRecord().

◆ WALInsertLockUpdateInsertingAt()

static void WALInsertLockUpdateInsertingAt ( XLogRecPtr  insertingAt)
static

Definition at line 1477 of file xlog.c.

1478{
1479 if (holdingAllLocks)
1480 {
1481 /*
1482 * We use the last lock to mark our actual position, see comments in
1483 * WALInsertLockAcquireExclusive.
1484 */
1487 insertingAt);
1488 }
1489 else
1492 insertingAt);
1493}

References holdingAllLocks, WALInsertLock::insertingAt, WALInsertLockPadded::l, LWLockUpdateVar(), MyLockNo, NUM_XLOGINSERT_LOCKS, and WALInsertLocks.

Referenced by GetXLogBuffer().

◆ WALReadFromBuffers()

Size WALReadFromBuffers ( char *  dstbuf,
XLogRecPtr  startptr,
Size  count,
TimeLineID  tli 
)

Definition at line 1754 of file xlog.c.

1756{
1757 char *pdst = dstbuf;
1758 XLogRecPtr recptr = startptr;
1759 XLogRecPtr inserted;
1760 Size nbytes = count;
1761
1763 return 0;
1764
1765 Assert(XLogRecPtrIsValid(startptr));
1766
1767 /*
1768 * Caller should ensure that the requested data has been inserted into WAL
1769 * buffers before we try to read it.
1770 */
1772 if (startptr + count > inserted)
1773 ereport(ERROR,
1774 errmsg("cannot read past end of generated WAL: requested %X/%08X, current position %X/%08X",
1775 LSN_FORMAT_ARGS(startptr + count),
1776 LSN_FORMAT_ARGS(inserted)));
1777
1778 /*
1779 * Loop through the buffers without a lock. For each buffer, atomically
1780 * read and verify the end pointer, then copy the data out, and finally
1781 * re-read and re-verify the end pointer.
1782 *
1783 * Once a page is evicted, it never returns to the WAL buffers, so if the
1784 * end pointer matches the expected end pointer before and after we copy
1785 * the data, then the right page must have been present during the data
1786 * copy. Read barriers are necessary to ensure that the data copy actually
1787 * happens between the two verification steps.
1788 *
1789 * If either verification fails, we simply terminate the loop and return
1790 * with the data that had been already copied out successfully.
1791 */
1792 while (nbytes > 0)
1793 {
1794 uint32 offset = recptr % XLOG_BLCKSZ;
1795 int idx = XLogRecPtrToBufIdx(recptr);
1796 XLogRecPtr expectedEndPtr;
1797 XLogRecPtr endptr;
1798 const char *page;
1799 const char *psrc;
1800 Size npagebytes;
1801
1802 /*
1803 * Calculate the end pointer we expect in the xlblocks array if the
1804 * correct page is present.
1805 */
1806 expectedEndPtr = recptr + (XLOG_BLCKSZ - offset);
1807
1808 /*
1809 * First verification step: check that the correct page is present in
1810 * the WAL buffers.
1811 */
1813 if (expectedEndPtr != endptr)
1814 break;
1815
1816 /*
1817 * The correct page is present (or was at the time the endptr was
1818 * read; must re-verify later). Calculate pointer to source data and
1819 * determine how much data to read from this page.
1820 */
1821 page = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
1822 psrc = page + offset;
1823 npagebytes = Min(nbytes, XLOG_BLCKSZ - offset);
1824
1825 /*
1826 * Ensure that the data copy and the first verification step are not
1827 * reordered.
1828 */
1830
1831 /* data copy */
1832 memcpy(pdst, psrc, npagebytes);
1833
1834 /*
1835 * Ensure that the data copy and the second verification step are not
1836 * reordered.
1837 */
1839
1840 /*
1841 * Second verification step: check that the page we read from wasn't
1842 * evicted while we were copying the data.
1843 */
1845 if (expectedEndPtr != endptr)
1846 break;
1847
1848 pdst += npagebytes;
1849 recptr += npagebytes;
1850 nbytes -= npagebytes;
1851 }
1852
1853 Assert(pdst - dstbuf <= count);
1854
1855 return pdst - dstbuf;
1856}
#define pg_read_barrier()
Definition: atomics.h:154
#define Min(x, y)
Definition: c.h:1008
TimeLineID GetWALInsertionTimeLine(void)
Definition: xlog.c:6592

References Assert(), ereport, errmsg(), ERROR, GetWALInsertionTimeLine(), idx(), XLogCtlData::logInsertResult, LSN_FORMAT_ARGS, Min, XLogCtlData::pages, pg_atomic_read_u64(), pg_read_barrier, RecoveryInProgress(), XLogCtlData::xlblocks, XLogCtl, XLogRecPtrIsValid, and XLogRecPtrToBufIdx.

Referenced by XLogSendPhysical().

◆ WriteControlFile()

static void WriteControlFile ( void  )
static

Definition at line 4258 of file xlog.c.

4259{
4260 int fd;
4261 char buffer[PG_CONTROL_FILE_SIZE]; /* need not be aligned */
4262
4263 /*
4264 * Initialize version and compatibility-check fields
4265 */
4268
4269 ControlFile->maxAlign = MAXIMUM_ALIGNOF;
4271
4272 ControlFile->blcksz = BLCKSZ;
4273 ControlFile->relseg_size = RELSEG_SIZE;
4275 ControlFile->xlog_blcksz = XLOG_BLCKSZ;
4277
4280
4283
4284 ControlFile->float8ByVal = true; /* vestigial */
4285
4286 /*
4287 * Initialize the default 'char' signedness.
4288 *
4289 * The signedness of the char type is implementation-defined. For instance
4290 * on x86 architecture CPUs, the char data type is typically treated as
4291 * signed by default, whereas on aarch architecture CPUs, it is typically
4292 * treated as unsigned by default. In v17 or earlier, we accidentally let
4293 * C implementation signedness affect persistent data. This led to
4294 * inconsistent results when comparing char data across different
4295 * platforms.
4296 *
4297 * This flag can be used as a hint to ensure consistent behavior for
4298 * pre-v18 data files that store data sorted by the 'char' type on disk,
4299 * especially in cross-platform replication scenarios.
4300 *
4301 * Newly created database clusters unconditionally set the default char
4302 * signedness to true. pg_upgrade changes this flag for clusters that were
4303 * initialized on signedness=false platforms. As a result,
4304 * signedness=false setting will become rare over time. If we had known
4305 * about this problem during the last development cycle that forced initdb
4306 * (v8.3), we would have made all clusters signed or all clusters
4307 * unsigned. Making pg_upgrade the only source of signedness=false will
4308 * cause the population of database clusters to converge toward that
4309 * retrospective ideal.
4310 */
4312
4313 /* Contents are protected with a CRC */
4317 offsetof(ControlFileData, crc));
4319
4320 /*
4321 * We write out PG_CONTROL_FILE_SIZE bytes into pg_control, zero-padding
4322 * the excess over sizeof(ControlFileData). This reduces the odds of
4323 * premature-EOF errors when reading pg_control. We'll still fail when we
4324 * check the contents of the file, but hopefully with a more specific
4325 * error than "couldn't read pg_control".
4326 */
4327 memset(buffer, 0, PG_CONTROL_FILE_SIZE);
4328 memcpy(buffer, ControlFile, sizeof(ControlFileData));
4329
4331 O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
4332 if (fd < 0)
4333 ereport(PANIC,
4335 errmsg("could not create file \"%s\": %m",
4337
4338 errno = 0;
4339 pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE);
4341 {
4342 /* if write didn't set errno, assume problem is no disk space */
4343 if (errno == 0)
4344 errno = ENOSPC;
4345 ereport(PANIC,
4347 errmsg("could not write to file \"%s\": %m",
4349 }
4351
4352 pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_SYNC);
4353 if (pg_fsync(fd) != 0)
4354 ereport(PANIC,
4356 errmsg("could not fsync file \"%s\": %m",
4359
4360 if (close(fd) != 0)
4361 ereport(PANIC,
4363 errmsg("could not close file \"%s\": %m",
4365}
#define PG_CONTROL_FILE_SIZE
Definition: pg_control.h:258

References BasicOpenFile(), ControlFileData::blcksz, CATALOG_VERSION_NO, ControlFileData::catalog_version_no, close, COMP_CRC32C, ControlFile, ControlFileData::crc, crc, ControlFileData::default_char_signedness, ereport, errcode_for_file_access(), errmsg(), fd(), FIN_CRC32C, ControlFileData::float8ByVal, ControlFileData::floatFormat, FLOATFORMAT_VALUE, INDEX_MAX_KEYS, ControlFileData::indexMaxKeys, INIT_CRC32C, ControlFileData::loblksize, LOBLKSIZE, ControlFileData::maxAlign, ControlFileData::nameDataLen, NAMEDATALEN, PANIC, PG_BINARY, PG_CONTROL_FILE_SIZE, PG_CONTROL_VERSION, ControlFileData::pg_control_version, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), ControlFileData::relseg_size, ControlFileData::slru_pages_per_segment, SLRU_PAGES_PER_SEGMENT, TOAST_MAX_CHUNK_SIZE, ControlFileData::toast_max_chunk_size, wal_segment_size, write, ControlFileData::xlog_blcksz, XLOG_CONTROL_FILE, and ControlFileData::xlog_seg_size.

Referenced by BootStrapXLOG().

◆ xlog_redo()

void xlog_redo ( XLogReaderState record)

Definition at line 8305 of file xlog.c.

8306{
8307 uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
8308 XLogRecPtr lsn = record->EndRecPtr;
8309
8310 /*
8311 * In XLOG rmgr, backup blocks are only used by XLOG_FPI and
8312 * XLOG_FPI_FOR_HINT records.
8313 */
8314 Assert(info == XLOG_FPI || info == XLOG_FPI_FOR_HINT ||
8315 !XLogRecHasAnyBlockRefs(record));
8316
8317 if (info == XLOG_NEXTOID)
8318 {
8319 Oid nextOid;
8320
8321 /*
8322 * We used to try to take the maximum of TransamVariables->nextOid and
8323 * the recorded nextOid, but that fails if the OID counter wraps
8324 * around. Since no OID allocation should be happening during replay
8325 * anyway, better to just believe the record exactly. We still take
8326 * OidGenLock while setting the variable, just in case.
8327 */
8328 memcpy(&nextOid, XLogRecGetData(record), sizeof(Oid));
8329 LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8330 TransamVariables->nextOid = nextOid;
8332 LWLockRelease(OidGenLock);
8333 }
8334 else if (info == XLOG_CHECKPOINT_SHUTDOWN)
8335 {
8336 CheckPoint checkPoint;
8337 TimeLineID replayTLI;
8338
8339 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8340 /* In a SHUTDOWN checkpoint, believe the counters exactly */
8341 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8342 TransamVariables->nextXid = checkPoint.nextXid;
8343 LWLockRelease(XidGenLock);
8344 LWLockAcquire(OidGenLock, LW_EXCLUSIVE);
8345 TransamVariables->nextOid = checkPoint.nextOid;
8347 LWLockRelease(OidGenLock);
8349 checkPoint.nextMultiOffset);
8350
8352 checkPoint.oldestMultiDB);
8353
8354 /*
8355 * No need to set oldestClogXid here as well; it'll be set when we
8356 * redo an xl_clog_truncate if it changed since initialization.
8357 */
8358 SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
8359
8360 /*
8361 * If we see a shutdown checkpoint while waiting for an end-of-backup
8362 * record, the backup was canceled and the end-of-backup record will
8363 * never arrive.
8364 */
8368 ereport(PANIC,
8369 (errmsg("online backup was canceled, recovery cannot continue")));
8370
8371 /*
8372 * If we see a shutdown checkpoint, we know that nothing was running
8373 * on the primary at this point. So fake-up an empty running-xacts
8374 * record and use that here and now. Recover additional standby state
8375 * for prepared transactions.
8376 */
8378 {
8379 TransactionId *xids;
8380 int nxids;
8381 TransactionId oldestActiveXID;
8382 TransactionId latestCompletedXid;
8384
8385 oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
8386
8387 /* Update pg_subtrans entries for any prepared transactions */
8389
8390 /*
8391 * Construct a RunningTransactions snapshot representing a shut
8392 * down server, with only prepared transactions still alive. We're
8393 * never overflowed at this point because all subxids are listed
8394 * with their parent prepared transactions.
8395 */
8396 running.xcnt = nxids;
8397 running.subxcnt = 0;
8399 running.nextXid = XidFromFullTransactionId(checkPoint.nextXid);
8400 running.oldestRunningXid = oldestActiveXID;
8401 latestCompletedXid = XidFromFullTransactionId(checkPoint.nextXid);
8402 TransactionIdRetreat(latestCompletedXid);
8403 Assert(TransactionIdIsNormal(latestCompletedXid));
8404 running.latestCompletedXid = latestCompletedXid;
8405 running.xids = xids;
8406
8408 }
8409
8410 /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8411 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8413 LWLockRelease(ControlFileLock);
8414
8415 /*
8416 * We should've already switched to the new TLI before replaying this
8417 * record.
8418 */
8419 (void) GetCurrentReplayRecPtr(&replayTLI);
8420 if (checkPoint.ThisTimeLineID != replayTLI)
8421 ereport(PANIC,
8422 (errmsg("unexpected timeline ID %u (should be %u) in shutdown checkpoint record",
8423 checkPoint.ThisTimeLineID, replayTLI)));
8424
8425 RecoveryRestartPoint(&checkPoint, record);
8426
8427 /*
8428 * After replaying a checkpoint record, free all smgr objects.
8429 * Otherwise we would never do so for dropped relations, as the
8430 * startup does not process shared invalidation messages or call
8431 * AtEOXact_SMgr().
8432 */
8434 }
8435 else if (info == XLOG_CHECKPOINT_ONLINE)
8436 {
8437 CheckPoint checkPoint;
8438 TimeLineID replayTLI;
8439
8440 memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
8441 /* In an ONLINE checkpoint, treat the XID counter as a minimum */
8442 LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
8444 checkPoint.nextXid))
8445 TransamVariables->nextXid = checkPoint.nextXid;
8446 LWLockRelease(XidGenLock);
8447
8448 /*
8449 * We ignore the nextOid counter in an ONLINE checkpoint, preferring
8450 * to track OID assignment through XLOG_NEXTOID records. The nextOid
8451 * counter is from the start of the checkpoint and might well be stale
8452 * compared to later XLOG_NEXTOID records. We could try to take the
8453 * maximum of the nextOid counter and our latest value, but since
8454 * there's no particular guarantee about the speed with which the OID
8455 * counter wraps around, that's a risky thing to do. In any case,
8456 * users of the nextOid counter are required to avoid assignment of
8457 * duplicates, so that a somewhat out-of-date value should be safe.
8458 */
8459
8460 /* Handle multixact */
8462 checkPoint.nextMultiOffset);
8463
8464 /*
8465 * NB: This may perform multixact truncation when replaying WAL
8466 * generated by an older primary.
8467 */
8469 checkPoint.oldestMultiDB);
8471 checkPoint.oldestXid))
8473 checkPoint.oldestXidDB);
8474 /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
8475 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8477 LWLockRelease(ControlFileLock);
8478
8479 /* TLI should not change in an on-line checkpoint */
8480 (void) GetCurrentReplayRecPtr(&replayTLI);
8481 if (checkPoint.ThisTimeLineID != replayTLI)
8482 ereport(PANIC,
8483 (errmsg("unexpected timeline ID %u (should be %u) in online checkpoint record",
8484 checkPoint.ThisTimeLineID, replayTLI)));
8485
8486 RecoveryRestartPoint(&checkPoint, record);
8487
8488 /*
8489 * After replaying a checkpoint record, free all smgr objects.
8490 * Otherwise we would never do so for dropped relations, as the
8491 * startup does not process shared invalidation messages or call
8492 * AtEOXact_SMgr().
8493 */
8495 }
8496 else if (info == XLOG_OVERWRITE_CONTRECORD)
8497 {
8498 /* nothing to do here, handled in xlogrecovery_redo() */
8499 }
8500 else if (info == XLOG_END_OF_RECOVERY)
8501 {
8502 xl_end_of_recovery xlrec;
8503 TimeLineID replayTLI;
8504
8505 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
8506
8507 /*
8508 * For Hot Standby, we could treat this like a Shutdown Checkpoint,
8509 * but this case is rarer and harder to test, so the benefit doesn't
8510 * outweigh the potential extra cost of maintenance.
8511 */
8512
8513 /*
8514 * We should've already switched to the new TLI before replaying this
8515 * record.
8516 */
8517 (void) GetCurrentReplayRecPtr(&replayTLI);
8518 if (xlrec.ThisTimeLineID != replayTLI)
8519 ereport(PANIC,
8520 (errmsg("unexpected timeline ID %u (should be %u) in end-of-recovery record",
8521 xlrec.ThisTimeLineID, replayTLI)));
8522 }
8523 else if (info == XLOG_NOOP)
8524 {
8525 /* nothing to do here */
8526 }
8527 else if (info == XLOG_SWITCH)
8528 {
8529 /* nothing to do here */
8530 }
8531 else if (info == XLOG_RESTORE_POINT)
8532 {
8533 /* nothing to do here, handled in xlogrecovery.c */
8534 }
8535 else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
8536 {
8537 /*
8538 * XLOG_FPI records contain nothing else but one or more block
8539 * references. Every block reference must include a full-page image
8540 * even if full_page_writes was disabled when the record was generated
8541 * - otherwise there would be no point in this record.
8542 *
8543 * XLOG_FPI_FOR_HINT records are generated when a page needs to be
8544 * WAL-logged because of a hint bit update. They are only generated
8545 * when checksums and/or wal_log_hints are enabled. They may include
8546 * no full-page images if full_page_writes was disabled when they were
8547 * generated. In this case there is nothing to do here.
8548 *
8549 * No recovery conflicts are generated by these generic records - if a
8550 * resource manager needs to generate conflicts, it has to define a
8551 * separate WAL record type and redo routine.
8552 */
8553 for (uint8 block_id = 0; block_id <= XLogRecMaxBlockId(record); block_id++)
8554 {
8555 Buffer buffer;
8556
8557 if (!XLogRecHasBlockImage(record, block_id))
8558 {
8559 if (info == XLOG_FPI)
8560 elog(ERROR, "XLOG_FPI record did not contain a full-page image");
8561 continue;
8562 }
8563
8564 if (XLogReadBufferForRedo(record, block_id, &buffer) != BLK_RESTORED)
8565 elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
8566 UnlockReleaseBuffer(buffer);
8567 }
8568 }
8569 else if (info == XLOG_BACKUP_END)
8570 {
8571 /* nothing to do here, handled in xlogrecovery_redo() */
8572 }
8573 else if (info == XLOG_PARAMETER_CHANGE)
8574 {
8575 xl_parameter_change xlrec;
8576
8577 /* Update our copy of the parameters in pg_control */
8578 memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_parameter_change));
8579
8580 /*
8581 * Invalidate logical slots if we are in hot standby and the primary
8582 * does not have a WAL level sufficient for logical decoding. No need
8583 * to search for potentially conflicting logically slots if standby is
8584 * running with wal_level lower than logical, because in that case, we
8585 * would have either disallowed creation of logical slots or
8586 * invalidated existing ones.
8587 */
8588 if (InRecovery && InHotStandby &&
8589 xlrec.wal_level < WAL_LEVEL_LOGICAL &&
8592 0, InvalidOid,
8594
8595 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8603
8604 /*
8605 * Update minRecoveryPoint to ensure that if recovery is aborted, we
8606 * recover back up to this point before allowing hot standby again.
8607 * This is important if the max_* settings are decreased, to ensure
8608 * you don't run queries against the WAL preceding the change. The
8609 * local copies cannot be updated as long as crash recovery is
8610 * happening and we expect all the WAL to be replayed.
8611 */
8613 {
8616 }
8618 {
8619 TimeLineID replayTLI;
8620
8621 (void) GetCurrentReplayRecPtr(&replayTLI);
8623 ControlFile->minRecoveryPointTLI = replayTLI;
8624 }
8625
8629
8631 LWLockRelease(ControlFileLock);
8632
8633 /* Check to see if any parameter change gives a problem on recovery */
8635 }
8636 else if (info == XLOG_FPW_CHANGE)
8637 {
8638 bool fpw;
8639
8640 memcpy(&fpw, XLogRecGetData(record), sizeof(bool));
8641
8642 /*
8643 * Update the LSN of the last replayed XLOG_FPW_CHANGE record so that
8644 * do_pg_backup_start() and do_pg_backup_stop() can check whether
8645 * full_page_writes has been disabled during online backup.
8646 */
8647 if (!fpw)
8648 {
8653 }
8654
8655 /* Keep track of full_page_writes */
8656 lastFullPageWrites = fpw;
8657 }
8658 else if (info == XLOG_CHECKPOINT_REDO)
8659 {
8660 /* nothing to do here, just for informational purposes */
8661 }
8662}
int Buffer
Definition: buf.h:23
void UnlockReleaseBuffer(Buffer buffer)
Definition: bufmgr.c:5383
uint8_t uint8
Definition: c.h:541
void CommitTsParameterChange(bool newvalue, bool oldvalue)
Definition: commit_ts.c:640
void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
Definition: multixact.c:2466
void MultiXactAdvanceNextMXact(MultiXactId minMulti, MultiXactOffset minMultiOffset)
Definition: multixact.c:2441
#define XLOG_RESTORE_POINT
Definition: pg_control.h:75
#define XLOG_FPI
Definition: pg_control.h:79
#define XLOG_FPI_FOR_HINT
Definition: pg_control.h:78
#define XLOG_NEXTOID
Definition: pg_control.h:71
#define XLOG_NOOP
Definition: pg_control.h:70
#define XLOG_PARAMETER_CHANGE
Definition: pg_control.h:74
@ RS_INVAL_WAL_LEVEL
Definition: slot.h:66
void smgrdestroyall(void)
Definition: smgr.c:386
#define FullTransactionIdPrecedes(a, b)
Definition: transam.h:51
static bool TransactionIdPrecedes(TransactionId id1, TransactionId id2)
Definition: transam.h:263
static void RecoveryRestartPoint(const CheckPoint *checkPoint, XLogReaderState *record)
Definition: xlog.c:7619
@ WAL_LEVEL_LOGICAL
Definition: xlog.h:76
#define XLogRecGetInfo(decoder)
Definition: xlogreader.h:409
#define XLogRecGetData(decoder)
Definition: xlogreader.h:414
#define XLogRecMaxBlockId(decoder)
Definition: xlogreader.h:417
#define XLogRecHasBlockImage(decoder, block_id)
Definition: xlogreader.h:422
#define XLogRecHasAnyBlockRefs(decoder)
Definition: xlogreader.h:416
XLogRedoAction XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id, Buffer *buf)
Definition: xlogutils.c:303
@ STANDBY_INITIALIZED
Definition: xlogutils.h:53
#define InHotStandby
Definition: xlogutils.h:60
@ BLK_RESTORED
Definition: xlogutils.h:76

References ArchiveRecoveryRequested, Assert(), ControlFileData::backupEndPoint, ControlFileData::backupStartPoint, BLK_RESTORED, ControlFileData::checkPointCopy, CheckRequiredParameterValues(), CommitTsParameterChange(), ControlFile, elog, XLogReaderState::EndRecPtr, ereport, errmsg(), ERROR, FullTransactionIdPrecedes, GetCurrentReplayRecPtr(), InArchiveRecovery, XLogCtlData::info_lck, InHotStandby, InRecovery, InvalidateObsoleteReplicationSlots(), InvalidOid, InvalidTransactionId, XLogCtlData::lastFpwDisableRecPtr, lastFullPageWrites, RunningTransactionsData::latestCompletedXid, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), xl_parameter_change::max_locks_per_xact, ControlFileData::max_locks_per_xact, xl_parameter_change::max_prepared_xacts, ControlFileData::max_prepared_xacts, xl_parameter_change::max_wal_senders, ControlFileData::max_wal_senders, xl_parameter_change::max_worker_processes, ControlFileData::max_worker_processes, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, MultiXactAdvanceNextMXact(), MultiXactAdvanceOldest(), MultiXactSetNextMXact(), CheckPoint::nextMulti, CheckPoint::nextMultiOffset, TransamVariablesData::nextOid, CheckPoint::nextOid, TransamVariablesData::nextXid, CheckPoint::nextXid, RunningTransactionsData::nextXid, TransamVariablesData::oidCount, CheckPoint::oldestMulti, CheckPoint::oldestMultiDB, RunningTransactionsData::oldestRunningXid, TransamVariablesData::oldestXid, CheckPoint::oldestXid, CheckPoint::oldestXidDB, PANIC, PrescanPreparedTransactions(), ProcArrayApplyRecoveryInfo(), XLogReaderState::ReadRecPtr, RecoveryRestartPoint(), RS_INVAL_WAL_LEVEL, SetTransactionIdLimit(), smgrdestroyall(), SpinLockAcquire, SpinLockRelease, STANDBY_INITIALIZED, StandbyRecoverPreparedTransactions(), standbyState, RunningTransactionsData::subxcnt, RunningTransactionsData::subxid_status, SUBXIDS_IN_SUBTRANS, xl_end_of_recovery::ThisTimeLineID, CheckPoint::ThisTimeLineID, xl_parameter_change::track_commit_timestamp, ControlFileData::track_commit_timestamp, TransactionIdIsNormal, TransactionIdPrecedes(), TransactionIdRetreat, TransamVariables, UnlockReleaseBuffer(), UpdateControlFile(), wal_level, xl_parameter_change::wal_level, ControlFileData::wal_level, WAL_LEVEL_LOGICAL, xl_parameter_change::wal_log_hints, ControlFileData::wal_log_hints, RunningTransactionsData::xcnt, XidFromFullTransactionId, RunningTransactionsData::xids, XLOG_BACKUP_END, XLOG_CHECKPOINT_ONLINE, XLOG_CHECKPOINT_REDO, XLOG_CHECKPOINT_SHUTDOWN, XLOG_END_OF_RECOVERY, XLOG_FPI, XLOG_FPI_FOR_HINT, XLOG_FPW_CHANGE, XLOG_NEXTOID, XLOG_NOOP, XLOG_OVERWRITE_CONTRECORD, XLOG_PARAMETER_CHANGE, XLOG_RESTORE_POINT, XLOG_SWITCH, XLogCtl, XLogReadBufferForRedo(), XLogRecGetData, XLogRecGetInfo, XLogRecHasAnyBlockRefs, XLogRecHasBlockImage, XLogRecMaxBlockId, and XLogRecPtrIsValid.

◆ XLogBackgroundFlush()

bool XLogBackgroundFlush ( void  )

Definition at line 2978 of file xlog.c.

2979{
2980 XLogwrtRqst WriteRqst;
2981 bool flexible = true;
2982 static TimestampTz lastflush;
2984 int flushblocks;
2985 TimeLineID insertTLI;
2986
2987 /* XLOG doesn't need flushing during recovery */
2988 if (RecoveryInProgress())
2989 return false;
2990
2991 /*
2992 * Since we're not in recovery, InsertTimeLineID is set and can't change,
2993 * so we can read it without a lock.
2994 */
2995 insertTLI = XLogCtl->InsertTimeLineID;
2996
2997 /* read updated LogwrtRqst */
2999 WriteRqst = XLogCtl->LogwrtRqst;
3001
3002 /* back off to last completed page boundary */
3003 WriteRqst.Write -= WriteRqst.Write % XLOG_BLCKSZ;
3004
3005 /* if we have already flushed that far, consider async commit records */
3007 if (WriteRqst.Write <= LogwrtResult.Flush)
3008 {
3010 WriteRqst.Write = XLogCtl->asyncXactLSN;
3012 flexible = false; /* ensure it all gets written */
3013 }
3014
3015 /*
3016 * If already known flushed, we're done. Just need to check if we are
3017 * holding an open file handle to a logfile that's no longer in use,
3018 * preventing the file from being deleted.
3019 */
3020 if (WriteRqst.Write <= LogwrtResult.Flush)
3021 {
3022 if (openLogFile >= 0)
3023 {
3026 {
3027 XLogFileClose();
3028 }
3029 }
3030 return false;
3031 }
3032
3033 /*
3034 * Determine how far to flush WAL, based on the wal_writer_delay and
3035 * wal_writer_flush_after GUCs.
3036 *
3037 * Note that XLogSetAsyncXactLSN() performs similar calculation based on
3038 * wal_writer_flush_after, to decide when to wake us up. Make sure the
3039 * logic is the same in both places if you change this.
3040 */
3042 flushblocks =
3043 WriteRqst.Write / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
3044
3045 if (WalWriterFlushAfter == 0 || lastflush == 0)
3046 {
3047 /* first call, or block based limits disabled */
3048 WriteRqst.Flush = WriteRqst.Write;
3049 lastflush = now;
3050 }
3051 else if (TimestampDifferenceExceeds(lastflush, now, WalWriterDelay))
3052 {
3053 /*
3054 * Flush the writes at least every WalWriterDelay ms. This is
3055 * important to bound the amount of time it takes for an asynchronous
3056 * commit to hit disk.
3057 */
3058 WriteRqst.Flush = WriteRqst.Write;
3059 lastflush = now;
3060 }
3061 else if (flushblocks >= WalWriterFlushAfter)
3062 {
3063 /* exceeded wal_writer_flush_after blocks, flush */
3064 WriteRqst.Flush = WriteRqst.Write;
3065 lastflush = now;
3066 }
3067 else
3068 {
3069 /* no flushing, this time round */
3070 WriteRqst.Flush = 0;
3071 }
3072
3073#ifdef WAL_DEBUG
3074 if (XLOG_DEBUG)
3075 elog(LOG, "xlog bg flush request write %X/%08X; flush: %X/%08X, current is write %X/%08X; flush %X/%08X",
3076 LSN_FORMAT_ARGS(WriteRqst.Write),
3077 LSN_FORMAT_ARGS(WriteRqst.Flush),
3080#endif
3081
3083
3084 /* now wait for any in-progress insertions to finish and get write lock */
3086 LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
3088 if (WriteRqst.Write > LogwrtResult.Write ||
3089 WriteRqst.Flush > LogwrtResult.Flush)
3090 {
3091 XLogWrite(WriteRqst, insertTLI, flexible);
3092 }
3093 LWLockRelease(WALWriteLock);
3094
3096
3097 /* wake up walsenders now that we've released heavily contended locks */
3099
3100 /*
3101 * Great, done. To take some work off the critical path, try to initialize
3102 * as many of the no-longer-needed WAL buffers for future use as we can.
3103 */
3104 AdvanceXLInsertBuffer(InvalidXLogRecPtr, insertTLI, true);
3105
3106 /*
3107 * If we determined that we need to write data, but somebody else
3108 * wrote/flushed already, it should be considered as being active, to
3109 * avoid hibernating too early.
3110 */
3111 return true;
3112}
bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec)
Definition: timestamp.c:1781
Datum now(PG_FUNCTION_ARGS)
Definition: timestamp.c:1609
XLogRecPtr asyncXactLSN
Definition: xlog.c:459
static void WalSndWakeupProcessRequests(bool physical, bool logical)
Definition: walsender.h:65
int WalWriterFlushAfter
Definition: walwriter.c:71
int WalWriterDelay
Definition: walwriter.c:70
#define XLByteInPrevSeg(xlrp, logSegNo, wal_segsz_bytes)

References AdvanceXLInsertBuffer(), XLogCtlData::asyncXactLSN, elog, END_CRIT_SECTION, XLogwrtRqst::Flush, XLogwrtResult::Flush, GetCurrentTimestamp(), XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), now(), openLogFile, openLogSegNo, RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, TimestampDifferenceExceeds(), WaitXLogInsertionsToFinish(), wal_segment_size, WalSndWakeupProcessRequests(), WalWriterDelay, WalWriterFlushAfter, XLogwrtRqst::Write, XLogwrtResult::Write, XLByteInPrevSeg, XLogCtl, XLogFileClose(), and XLogWrite().

Referenced by WalSndWaitForWal(), and WalWriterMain().

◆ XLogBytePosToEndRecPtr()

static XLogRecPtr XLogBytePosToEndRecPtr ( uint64  bytepos)
static

Definition at line 1904 of file xlog.c.

1905{
1906 uint64 fullsegs;
1907 uint64 fullpages;
1908 uint64 bytesleft;
1909 uint32 seg_offset;
1910 XLogRecPtr result;
1911
1912 fullsegs = bytepos / UsableBytesInSegment;
1913 bytesleft = bytepos % UsableBytesInSegment;
1914
1915 if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
1916 {
1917 /* fits on first page of segment */
1918 if (bytesleft == 0)
1919 seg_offset = 0;
1920 else
1921 seg_offset = bytesleft + SizeOfXLogLongPHD;
1922 }
1923 else
1924 {
1925 /* account for the first page on segment with long header */
1926 seg_offset = XLOG_BLCKSZ;
1927 bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
1928
1929 fullpages = bytesleft / UsableBytesInPage;
1930 bytesleft = bytesleft % UsableBytesInPage;
1931
1932 if (bytesleft == 0)
1933 seg_offset += fullpages * XLOG_BLCKSZ + bytesleft;
1934 else
1935 seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
1936 }
1937
1938 XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, wal_segment_size, result);
1939
1940 return result;
1941}
#define XLogSegNoOffsetToRecPtr(segno, offset, wal_segsz_bytes, dest)

References SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and WaitXLogInsertionsToFinish().

◆ XLogBytePosToRecPtr()

static XLogRecPtr XLogBytePosToRecPtr ( uint64  bytepos)
static

Definition at line 1864 of file xlog.c.

1865{
1866 uint64 fullsegs;
1867 uint64 fullpages;
1868 uint64 bytesleft;
1869 uint32 seg_offset;
1870 XLogRecPtr result;
1871
1872 fullsegs = bytepos / UsableBytesInSegment;
1873 bytesleft = bytepos % UsableBytesInSegment;
1874
1875 if (bytesleft < XLOG_BLCKSZ - SizeOfXLogLongPHD)
1876 {
1877 /* fits on first page of segment */
1878 seg_offset = bytesleft + SizeOfXLogLongPHD;
1879 }
1880 else
1881 {
1882 /* account for the first page on segment with long header */
1883 seg_offset = XLOG_BLCKSZ;
1884 bytesleft -= XLOG_BLCKSZ - SizeOfXLogLongPHD;
1885
1886 fullpages = bytesleft / UsableBytesInPage;
1887 bytesleft = bytesleft % UsableBytesInPage;
1888
1889 seg_offset += fullpages * XLOG_BLCKSZ + bytesleft + SizeOfXLogShortPHD;
1890 }
1891
1892 XLogSegNoOffsetToRecPtr(fullsegs, seg_offset, wal_segment_size, result);
1893
1894 return result;
1895}

References SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, and XLogSegNoOffsetToRecPtr.

Referenced by CreateCheckPoint(), GetXLogInsertRecPtr(), ReserveXLogInsertLocation(), and ReserveXLogSwitch().

◆ XLogCheckpointNeeded()

bool XLogCheckpointNeeded ( XLogSegNo  new_segno)

Definition at line 2283 of file xlog.c.

2284{
2285 XLogSegNo old_segno;
2286
2288
2289 if (new_segno >= old_segno + (uint64) (CheckPointSegments - 1))
2290 return true;
2291 return false;
2292}

References CheckPointSegments, RedoRecPtr, wal_segment_size, and XLByteToSeg.

Referenced by XLogPageRead(), and XLogWrite().

◆ XLOGChooseNumBuffers()

static int XLOGChooseNumBuffers ( void  )
static

Definition at line 4675 of file xlog.c.

4676{
4677 int xbuffers;
4678
4679 xbuffers = NBuffers / 32;
4680 if (xbuffers > (wal_segment_size / XLOG_BLCKSZ))
4681 xbuffers = (wal_segment_size / XLOG_BLCKSZ);
4682 if (xbuffers < 8)
4683 xbuffers = 8;
4684 return xbuffers;
4685}

References NBuffers, and wal_segment_size.

Referenced by check_wal_buffers(), and XLOGShmemSize().

◆ XLogFileClose()

static void XLogFileClose ( void  )
static

Definition at line 3658 of file xlog.c.

3659{
3660 Assert(openLogFile >= 0);
3661
3662 /*
3663 * WAL segment files will not be re-read in normal operation, so we advise
3664 * the OS to release any cached pages. But do not do so if WAL archiving
3665 * or streaming is active, because archiver and walsender process could
3666 * use the cache to read the WAL segment.
3667 */
3668#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
3669 if (!XLogIsNeeded() && (io_direct_flags & IO_DIRECT_WAL) == 0)
3670 (void) posix_fadvise(openLogFile, 0, 0, POSIX_FADV_DONTNEED);
3671#endif
3672
3673 if (close(openLogFile) != 0)
3674 {
3675 char xlogfname[MAXFNAMELEN];
3676 int save_errno = errno;
3677
3679 errno = save_errno;
3680 ereport(PANIC,
3682 errmsg("could not close file \"%s\": %m", xlogfname)));
3683 }
3684
3685 openLogFile = -1;
3687}
void ReleaseExternalFD(void)
Definition: fd.c:1238

References Assert(), close, ereport, errcode_for_file_access(), errmsg(), io_direct_flags, IO_DIRECT_WAL, MAXFNAMELEN, openLogFile, openLogSegNo, openLogTLI, PANIC, ReleaseExternalFD(), wal_segment_size, XLogFileName(), and XLogIsNeeded.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), and XLogWrite().

◆ XLogFileCopy()

static void XLogFileCopy ( TimeLineID  destTLI,
XLogSegNo  destsegno,
TimeLineID  srcTLI,
XLogSegNo  srcsegno,
int  upto 
)
static

Definition at line 3437 of file xlog.c.

3440{
3441 char path[MAXPGPATH];
3442 char tmppath[MAXPGPATH];
3443 PGAlignedXLogBlock buffer;
3444 int srcfd;
3445 int fd;
3446 int nbytes;
3447
3448 /*
3449 * Open the source file
3450 */
3451 XLogFilePath(path, srcTLI, srcsegno, wal_segment_size);
3452 srcfd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
3453 if (srcfd < 0)
3454 ereport(ERROR,
3456 errmsg("could not open file \"%s\": %m", path)));
3457
3458 /*
3459 * Copy into a temp file name.
3460 */
3461 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3462
3463 unlink(tmppath);
3464
3465 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3466 fd = OpenTransientFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
3467 if (fd < 0)
3468 ereport(ERROR,
3470 errmsg("could not create file \"%s\": %m", tmppath)));
3471
3472 /*
3473 * Do the data copying.
3474 */
3475 for (nbytes = 0; nbytes < wal_segment_size; nbytes += sizeof(buffer))
3476 {
3477 int nread;
3478
3479 nread = upto - nbytes;
3480
3481 /*
3482 * The part that is not read from the source file is filled with
3483 * zeros.
3484 */
3485 if (nread < sizeof(buffer))
3486 memset(buffer.data, 0, sizeof(buffer));
3487
3488 if (nread > 0)
3489 {
3490 int r;
3491
3492 if (nread > sizeof(buffer))
3493 nread = sizeof(buffer);
3494 pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
3495 r = read(srcfd, buffer.data, nread);
3496 if (r != nread)
3497 {
3498 if (r < 0)
3499 ereport(ERROR,
3501 errmsg("could not read file \"%s\": %m",
3502 path)));
3503 else
3504 ereport(ERROR,
3506 errmsg("could not read file \"%s\": read %d of %zu",
3507 path, r, (Size) nread)));
3508 }
3510 }
3511 errno = 0;
3512 pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_WRITE);
3513 if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
3514 {
3515 int save_errno = errno;
3516
3517 /*
3518 * If we fail to make the file, delete it to release disk space
3519 */
3520 unlink(tmppath);
3521 /* if write didn't set errno, assume problem is no disk space */
3522 errno = save_errno ? save_errno : ENOSPC;
3523
3524 ereport(ERROR,
3526 errmsg("could not write to file \"%s\": %m", tmppath)));
3527 }
3529 }
3530
3531 pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_SYNC);
3532 if (pg_fsync(fd) != 0)
3535 errmsg("could not fsync file \"%s\": %m", tmppath)));
3537
3538 if (CloseTransientFile(fd) != 0)
3539 ereport(ERROR,
3541 errmsg("could not close file \"%s\": %m", tmppath)));
3542
3543 if (CloseTransientFile(srcfd) != 0)
3544 ereport(ERROR,
3546 errmsg("could not close file \"%s\": %m", path)));
3547
3548 /*
3549 * Now move the segment into place with its final name.
3550 */
3551 if (!InstallXLogFileSegment(&destsegno, tmppath, false, 0, destTLI))
3552 elog(ERROR, "InstallXLogFileSegment should not have failed");
3553}
int CloseTransientFile(int fd)
Definition: fd.c:2868
int data_sync_elevel(int elevel)
Definition: fd.c:3998
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2691
char data[XLOG_BLCKSZ]
Definition: c.h:1140

References CloseTransientFile(), PGAlignedXLogBlock::data, data_sync_elevel(), elog, ereport, errcode(), ERRCODE_DATA_CORRUPTED, errcode_for_file_access(), errmsg(), ERROR, fd(), InstallXLogFileSegment(), MAXPGPATH, OpenTransientFile(), PG_BINARY, pg_fsync(), pgstat_report_wait_end(), pgstat_report_wait_start(), read, snprintf, wal_segment_size, write, XLOGDIR, and XLogFilePath().

Referenced by XLogInitNewTimeline().

◆ XLogFileInit()

int XLogFileInit ( XLogSegNo  logsegno,
TimeLineID  logtli 
)

Definition at line 3399 of file xlog.c.

3400{
3401 bool ignore_added;
3402 char path[MAXPGPATH];
3403 int fd;
3404
3405 Assert(logtli != 0);
3406
3407 fd = XLogFileInitInternal(logsegno, logtli, &ignore_added, path);
3408 if (fd >= 0)
3409 return fd;
3410
3411 /* Now open original target segment (might not be file I just made) */
3412 fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3414 if (fd < 0)
3415 ereport(ERROR,
3417 errmsg("could not open file \"%s\": %m", path)));
3418 return fd;
3419}
#define O_CLOEXEC
Definition: win32_port.h:349

References Assert(), BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PG_BINARY, wal_sync_method, and XLogFileInitInternal().

Referenced by BootStrapXLOG(), XLogInitNewTimeline(), XLogWalRcvWrite(), and XLogWrite().

◆ XLogFileInitInternal()

static int XLogFileInitInternal ( XLogSegNo  logsegno,
TimeLineID  logtli,
bool *  added,
char *  path 
)
static

Definition at line 3211 of file xlog.c.

3213{
3214 char tmppath[MAXPGPATH];
3215 XLogSegNo installed_segno;
3216 XLogSegNo max_segno;
3217 int fd;
3218 int save_errno;
3219 int open_flags = O_RDWR | O_CREAT | O_EXCL | PG_BINARY;
3220 instr_time io_start;
3221
3222 Assert(logtli != 0);
3223
3224 XLogFilePath(path, logtli, logsegno, wal_segment_size);
3225
3226 /*
3227 * Try to use existent file (checkpoint maker may have created it already)
3228 */
3229 *added = false;
3230 fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3232 if (fd < 0)
3233 {
3234 if (errno != ENOENT)
3235 ereport(ERROR,
3237 errmsg("could not open file \"%s\": %m", path)));
3238 }
3239 else
3240 return fd;
3241
3242 /*
3243 * Initialize an empty (all zeroes) segment. NOTE: it is possible that
3244 * another process is doing the same thing. If so, we will end up
3245 * pre-creating an extra log segment. That seems OK, and better than
3246 * holding the lock throughout this lengthy process.
3247 */
3248 elog(DEBUG2, "creating and filling new WAL file");
3249
3250 snprintf(tmppath, MAXPGPATH, XLOGDIR "/xlogtemp.%d", (int) getpid());
3251
3252 unlink(tmppath);
3253
3255 open_flags |= PG_O_DIRECT;
3256
3257 /* do not use get_sync_bit() here --- want to fsync only at end of fill */
3258 fd = BasicOpenFile(tmppath, open_flags);
3259 if (fd < 0)
3260 ereport(ERROR,
3262 errmsg("could not create file \"%s\": %m", tmppath)));
3263
3264 /* Measure I/O timing when initializing segment */
3266
3267 pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
3268 save_errno = 0;
3269 if (wal_init_zero)
3270 {
3271 ssize_t rc;
3272
3273 /*
3274 * Zero-fill the file. With this setting, we do this the hard way to
3275 * ensure that all the file space has really been allocated. On
3276 * platforms that allow "holes" in files, just seeking to the end
3277 * doesn't allocate intermediate space. This way, we know that we
3278 * have all the space and (after the fsync below) that all the
3279 * indirect blocks are down on disk. Therefore, fdatasync(2) or
3280 * O_DSYNC will be sufficient to sync future writes to the log file.
3281 */
3283
3284 if (rc < 0)
3285 save_errno = errno;
3286 }
3287 else
3288 {
3289 /*
3290 * Otherwise, seeking to the end and writing a solitary byte is
3291 * enough.
3292 */
3293 errno = 0;
3294 if (pg_pwrite(fd, "\0", 1, wal_segment_size - 1) != 1)
3295 {
3296 /* if write didn't set errno, assume no disk space */
3297 save_errno = errno ? errno : ENOSPC;
3298 }
3299 }
3301
3302 /*
3303 * A full segment worth of data is written when using wal_init_zero. One
3304 * byte is written when not using it.
3305 */
3307 io_start, 1,
3309
3310 if (save_errno)
3311 {
3312 /*
3313 * If we fail to make the file, delete it to release disk space
3314 */
3315 unlink(tmppath);
3316
3317 close(fd);
3318
3319 errno = save_errno;
3320
3321 ereport(ERROR,
3323 errmsg("could not write to file \"%s\": %m", tmppath)));
3324 }
3325
3326 /* Measure I/O timing when flushing segment */
3328
3329 pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
3330 if (pg_fsync(fd) != 0)
3331 {
3332 save_errno = errno;
3333 close(fd);
3334 errno = save_errno;
3335 ereport(ERROR,
3337 errmsg("could not fsync file \"%s\": %m", tmppath)));
3338 }
3340
3342 IOOP_FSYNC, io_start, 1, 0);
3343
3344 if (close(fd) != 0)
3345 ereport(ERROR,
3347 errmsg("could not close file \"%s\": %m", tmppath)));
3348
3349 /*
3350 * Now move the segment into place with its final name. Cope with
3351 * possibility that someone else has created the file while we were
3352 * filling ours: if so, use ours to pre-create a future log segment.
3353 */
3354 installed_segno = logsegno;
3355
3356 /*
3357 * XXX: What should we use as max_segno? We used to use XLOGfileslop when
3358 * that was a constant, but that was always a bit dubious: normally, at a
3359 * checkpoint, XLOGfileslop was the offset from the checkpoint record, but
3360 * here, it was the offset from the insert location. We can't do the
3361 * normal XLOGfileslop calculation here because we don't have access to
3362 * the prior checkpoint's redo location. So somewhat arbitrarily, just use
3363 * CheckPointSegments.
3364 */
3365 max_segno = logsegno + CheckPointSegments;
3366 if (InstallXLogFileSegment(&installed_segno, tmppath, true, max_segno,
3367 logtli))
3368 {
3369 *added = true;
3370 elog(DEBUG2, "done creating and filling new WAL file");
3371 }
3372 else
3373 {
3374 /*
3375 * No need for any more future segments, or InstallXLogFileSegment()
3376 * failed to rename the file into place. If the rename failed, a
3377 * caller opening the file may fail.
3378 */
3379 unlink(tmppath);
3380 elog(DEBUG2, "abandoned new WAL file");
3381 }
3382
3383 return -1;
3384}
#define IO_DIRECT_WAL_INIT
Definition: fd.h:56
ssize_t pg_pwrite_zeros(int fd, size_t size, pgoff_t offset)
Definition: file_utils.c:709
@ IOCONTEXT_INIT
Definition: pgstat.h:288
@ IOOP_WRITE
Definition: pgstat.h:316
#define pg_pwrite
Definition: port.h:227
bool wal_init_zero
Definition: xlog.c:129

References Assert(), BasicOpenFile(), CheckPointSegments, close, DEBUG2, elog, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), get_sync_bit(), InstallXLogFileSegment(), io_direct_flags, IO_DIRECT_WAL_INIT, IOCONTEXT_INIT, IOOBJECT_WAL, IOOP_FSYNC, IOOP_WRITE, MAXPGPATH, O_CLOEXEC, PG_BINARY, pg_fsync(), PG_O_DIRECT, pg_pwrite, pg_pwrite_zeros(), pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), snprintf, track_wal_io_timing, wal_init_zero, wal_segment_size, wal_sync_method, XLOGDIR, and XLogFilePath().

Referenced by PreallocXlogFiles(), and XLogFileInit().

◆ XLogFileOpen()

int XLogFileOpen ( XLogSegNo  segno,
TimeLineID  tli 
)

Definition at line 3637 of file xlog.c.

3638{
3639 char path[MAXPGPATH];
3640 int fd;
3641
3642 XLogFilePath(path, tli, segno, wal_segment_size);
3643
3644 fd = BasicOpenFile(path, O_RDWR | PG_BINARY | O_CLOEXEC |
3646 if (fd < 0)
3647 ereport(PANIC,
3649 errmsg("could not open file \"%s\": %m", path)));
3650
3651 return fd;
3652}

References BasicOpenFile(), ereport, errcode_for_file_access(), errmsg(), fd(), get_sync_bit(), MAXPGPATH, O_CLOEXEC, PANIC, PG_BINARY, wal_segment_size, wal_sync_method, and XLogFilePath().

Referenced by XLogWrite().

◆ XLOGfileslop()

static XLogSegNo XLOGfileslop ( XLogRecPtr  lastredoptr)
static

Definition at line 2233 of file xlog.c.

2234{
2235 XLogSegNo minSegNo;
2236 XLogSegNo maxSegNo;
2237 double distance;
2238 XLogSegNo recycleSegNo;
2239
2240 /*
2241 * Calculate the segment numbers that min_wal_size_mb and max_wal_size_mb
2242 * correspond to. Always recycle enough segments to meet the minimum, and
2243 * remove enough segments to stay below the maximum.
2244 */
2245 minSegNo = lastredoptr / wal_segment_size +
2247 maxSegNo = lastredoptr / wal_segment_size +
2249
2250 /*
2251 * Between those limits, recycle enough segments to get us through to the
2252 * estimated end of next checkpoint.
2253 *
2254 * To estimate where the next checkpoint will finish, assume that the
2255 * system runs steadily consuming CheckPointDistanceEstimate bytes between
2256 * every checkpoint.
2257 */
2259 /* add 10% for good measure. */
2260 distance *= 1.10;
2261
2262 recycleSegNo = (XLogSegNo) ceil(((double) lastredoptr + distance) /
2264
2265 if (recycleSegNo < minSegNo)
2266 recycleSegNo = minSegNo;
2267 if (recycleSegNo > maxSegNo)
2268 recycleSegNo = maxSegNo;
2269
2270 return recycleSegNo;
2271}

References CheckPointCompletionTarget, CheckPointDistanceEstimate, ConvertToXSegs, max_wal_size_mb, min_wal_size_mb, and wal_segment_size.

Referenced by RemoveOldXlogFiles().

◆ XLogFlush()

void XLogFlush ( XLogRecPtr  record)

Definition at line 2783 of file xlog.c.

2784{
2785 XLogRecPtr WriteRqstPtr;
2786 XLogwrtRqst WriteRqst;
2787 TimeLineID insertTLI = XLogCtl->InsertTimeLineID;
2788
2789 /*
2790 * During REDO, we are reading not writing WAL. Therefore, instead of
2791 * trying to flush the WAL, we should update minRecoveryPoint instead. We
2792 * test XLogInsertAllowed(), not InRecovery, because we need checkpointer
2793 * to act this way too, and because when it tries to write the
2794 * end-of-recovery checkpoint, it should indeed flush.
2795 */
2796 if (!XLogInsertAllowed())
2797 {
2798 UpdateMinRecoveryPoint(record, false);
2799 return;
2800 }
2801
2802 /* Quick exit if already known flushed */
2803 if (record <= LogwrtResult.Flush)
2804 return;
2805
2806#ifdef WAL_DEBUG
2807 if (XLOG_DEBUG)
2808 elog(LOG, "xlog flush request %X/%08X; write %X/%08X; flush %X/%08X",
2809 LSN_FORMAT_ARGS(record),
2812#endif
2813
2815
2816 /*
2817 * Since fsync is usually a horribly expensive operation, we try to
2818 * piggyback as much data as we can on each fsync: if we see any more data
2819 * entered into the xlog buffer, we'll write and fsync that too, so that
2820 * the final value of LogwrtResult.Flush is as large as possible. This
2821 * gives us some chance of avoiding another fsync immediately after.
2822 */
2823
2824 /* initialize to given target; may increase below */
2825 WriteRqstPtr = record;
2826
2827 /*
2828 * Now wait until we get the write lock, or someone else does the flush
2829 * for us.
2830 */
2831 for (;;)
2832 {
2833 XLogRecPtr insertpos;
2834
2835 /* done already? */
2837 if (record <= LogwrtResult.Flush)
2838 break;
2839
2840 /*
2841 * Before actually performing the write, wait for all in-flight
2842 * insertions to the pages we're about to write to finish.
2843 */
2845 if (WriteRqstPtr < XLogCtl->LogwrtRqst.Write)
2846 WriteRqstPtr = XLogCtl->LogwrtRqst.Write;
2848 insertpos = WaitXLogInsertionsToFinish(WriteRqstPtr);
2849
2850 /*
2851 * Try to get the write lock. If we can't get it immediately, wait
2852 * until it's released, and recheck if we still need to do the flush
2853 * or if the backend that held the lock did it for us already. This
2854 * helps to maintain a good rate of group committing when the system
2855 * is bottlenecked by the speed of fsyncing.
2856 */
2857 if (!LWLockAcquireOrWait(WALWriteLock, LW_EXCLUSIVE))
2858 {
2859 /*
2860 * The lock is now free, but we didn't acquire it yet. Before we
2861 * do, loop back to check if someone else flushed the record for
2862 * us already.
2863 */
2864 continue;
2865 }
2866
2867 /* Got the lock; recheck whether request is satisfied */
2869 if (record <= LogwrtResult.Flush)
2870 {
2871 LWLockRelease(WALWriteLock);
2872 break;
2873 }
2874
2875 /*
2876 * Sleep before flush! By adding a delay here, we may give further
2877 * backends the opportunity to join the backlog of group commit
2878 * followers; this can significantly improve transaction throughput,
2879 * at the risk of increasing transaction latency.
2880 *
2881 * We do not sleep if enableFsync is not turned on, nor if there are
2882 * fewer than CommitSiblings other backends with active transactions.
2883 */
2884 if (CommitDelay > 0 && enableFsync &&
2886 {
2888
2889 /*
2890 * Re-check how far we can now flush the WAL. It's generally not
2891 * safe to call WaitXLogInsertionsToFinish while holding
2892 * WALWriteLock, because an in-progress insertion might need to
2893 * also grab WALWriteLock to make progress. But we know that all
2894 * the insertions up to insertpos have already finished, because
2895 * that's what the earlier WaitXLogInsertionsToFinish() returned.
2896 * We're only calling it again to allow insertpos to be moved
2897 * further forward, not to actually wait for anyone.
2898 */
2899 insertpos = WaitXLogInsertionsToFinish(insertpos);
2900 }
2901
2902 /* try to write/flush later additions to XLOG as well */
2903 WriteRqst.Write = insertpos;
2904 WriteRqst.Flush = insertpos;
2905
2906 XLogWrite(WriteRqst, insertTLI, false);
2907
2908 LWLockRelease(WALWriteLock);
2909 /* done */
2910 break;
2911 }
2912
2914
2915 /* wake up walsenders now that we've released heavily contended locks */
2917
2918 /*
2919 * If we still haven't flushed to the request point then we have a
2920 * problem; most likely, the requested flush point is past end of XLOG.
2921 * This has been seen to occur when a disk page has a corrupted LSN.
2922 *
2923 * Formerly we treated this as a PANIC condition, but that hurts the
2924 * system's robustness rather than helping it: we do not want to take down
2925 * the whole system due to corruption on one data page. In particular, if
2926 * the bad page is encountered again during recovery then we would be
2927 * unable to restart the database at all! (This scenario actually
2928 * happened in the field several times with 7.1 releases.) As of 8.4, bad
2929 * LSNs encountered during recovery are UpdateMinRecoveryPoint's problem;
2930 * the only time we can reach here during recovery is while flushing the
2931 * end-of-recovery checkpoint record, and we don't expect that to have a
2932 * bad LSN.
2933 *
2934 * Note that for calls from xact.c, the ERROR will be promoted to PANIC
2935 * since xact.c calls this routine inside a critical section. However,
2936 * calls from bufmgr.c are not within critical sections and so we will not
2937 * force a restart for a bad LSN on a data page.
2938 */
2939 if (LogwrtResult.Flush < record)
2940 elog(ERROR,
2941 "xlog flush request %X/%08X is not satisfied --- flushed only to %X/%08X",
2942 LSN_FORMAT_ARGS(record),
2944
2945 /*
2946 * Cross-check XLogNeedsFlush(). Some of the checks of XLogFlush() and
2947 * XLogNeedsFlush() are duplicated, and this assertion ensures that these
2948 * remain consistent.
2949 */
2950 Assert(!XLogNeedsFlush(record));
2951}
bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1402
bool MinimumActiveBackends(int min)
Definition: procarray.c:3508
int CommitDelay
Definition: xlog.c:134
int CommitSiblings
Definition: xlog.c:135
bool XLogNeedsFlush(XLogRecPtr record)
Definition: xlog.c:3127
bool XLogInsertAllowed(void)
Definition: xlog.c:6461

References Assert(), CommitDelay, CommitSiblings, elog, enableFsync, END_CRIT_SECTION, ERROR, XLogwrtRqst::Flush, XLogwrtResult::Flush, XLogCtlData::info_lck, XLogCtlData::InsertTimeLineID, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, LW_EXCLUSIVE, LWLockAcquireOrWait(), LWLockRelease(), MinimumActiveBackends(), pg_usleep(), RecoveryInProgress(), RefreshXLogWriteResult, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, UpdateMinRecoveryPoint(), WaitXLogInsertionsToFinish(), WalSndWakeupProcessRequests(), XLogwrtRqst::Write, XLogwrtResult::Write, XLogCtl, XLogInsertAllowed(), XLogNeedsFlush(), and XLogWrite().

Referenced by CheckPointReplicationOrigin(), CreateCheckPoint(), CreateEndOfRecoveryRecord(), CreateOverwriteContrecordRecord(), dropdb(), EndPrepare(), FinishSyncWorker(), FlushBuffer(), LogLogicalMessage(), pg_truncate_visibility_map(), RecordTransactionAbortPrepared(), RecordTransactionCommit(), RecordTransactionCommitPrepared(), RelationTruncate(), ReplicationSlotReserveWal(), replorigin_get_progress(), replorigin_session_get_progress(), SlruPhysicalWritePage(), smgr_redo(), write_relmap_file(), WriteMTruncateXlogRec(), WriteTruncateXlogRec(), xact_redo_abort(), xact_redo_commit(), XLogInsertRecord(), and XLogReportParameters().

◆ XLogGetLastRemovedSegno()

XLogSegNo XLogGetLastRemovedSegno ( void  )

Definition at line 3777 of file xlog.c.

3778{
3779 XLogSegNo lastRemovedSegNo;
3780
3782 lastRemovedSegNo = XLogCtl->lastRemovedSegNo;
3784
3785 return lastRemovedSegNo;
3786}

References XLogCtlData::info_lck, XLogCtlData::lastRemovedSegNo, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by copy_replication_slot(), GetWALAvailability(), ReplicationSlotReserveWal(), and reserve_wal_for_local_slot().

◆ XLogGetOldestSegno()

XLogSegNo XLogGetOldestSegno ( TimeLineID  tli)

Definition at line 3793 of file xlog.c.

3794{
3795 DIR *xldir;
3796 struct dirent *xlde;
3797 XLogSegNo oldest_segno = 0;
3798
3799 xldir = AllocateDir(XLOGDIR);
3800 while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
3801 {
3802 TimeLineID file_tli;
3803 XLogSegNo file_segno;
3804
3805 /* Ignore files that are not XLOG segments. */
3806 if (!IsXLogFileName(xlde->d_name))
3807 continue;
3808
3809 /* Parse filename to get TLI and segno. */
3810 XLogFromFileName(xlde->d_name, &file_tli, &file_segno,
3812
3813 /* Ignore anything that's not from the TLI of interest. */
3814 if (tli != file_tli)
3815 continue;
3816
3817 /* If it's the oldest so far, update oldest_segno. */
3818 if (oldest_segno == 0 || file_segno < oldest_segno)
3819 oldest_segno = file_segno;
3820 }
3821
3822 FreeDir(xldir);
3823 return oldest_segno;
3824}

References AllocateDir(), dirent::d_name, FreeDir(), IsXLogFileName(), ReadDir(), wal_segment_size, XLOGDIR, and XLogFromFileName().

Referenced by GetOldestUnsummarizedLSN(), MaybeRemoveOldWalSummaries(), and reserve_wal_for_local_slot().

◆ XLogGetReplicationSlotMinimumLSN()

static XLogRecPtr XLogGetReplicationSlotMinimumLSN ( void  )
static

Definition at line 2682 of file xlog.c.

2683{
2684 XLogRecPtr retval;
2685
2689
2690 return retval;
2691}
XLogRecPtr replicationSlotMinLSN
Definition: xlog.c:460

References XLogCtlData::info_lck, XLogCtlData::replicationSlotMinLSN, SpinLockAcquire, SpinLockRelease, and XLogCtl.

Referenced by KeepLogSeg().

◆ XLogInitNewTimeline()

static void XLogInitNewTimeline ( TimeLineID  endTLI,
XLogRecPtr  endOfLog,
TimeLineID  newTLI 
)
static

Definition at line 5268 of file xlog.c.

5269{
5270 char xlogfname[MAXFNAMELEN];
5271 XLogSegNo endLogSegNo;
5272 XLogSegNo startLogSegNo;
5273
5274 /* we always switch to a new timeline after archive recovery */
5275 Assert(endTLI != newTLI);
5276
5277 /*
5278 * Update min recovery point one last time.
5279 */
5281
5282 /*
5283 * Calculate the last segment on the old timeline, and the first segment
5284 * on the new timeline. If the switch happens in the middle of a segment,
5285 * they are the same, but if the switch happens exactly at a segment
5286 * boundary, startLogSegNo will be endLogSegNo + 1.
5287 */
5288 XLByteToPrevSeg(endOfLog, endLogSegNo, wal_segment_size);
5289 XLByteToSeg(endOfLog, startLogSegNo, wal_segment_size);
5290
5291 /*
5292 * Initialize the starting WAL segment for the new timeline. If the switch
5293 * happens in the middle of a segment, copy data from the last WAL segment
5294 * of the old timeline up to the switch point, to the starting WAL segment
5295 * on the new timeline.
5296 */
5297 if (endLogSegNo == startLogSegNo)
5298 {
5299 /*
5300 * Make a copy of the file on the new timeline.
5301 *
5302 * Writing WAL isn't allowed yet, so there are no locking
5303 * considerations. But we should be just as tense as XLogFileInit to
5304 * avoid emplacing a bogus file.
5305 */
5306 XLogFileCopy(newTLI, endLogSegNo, endTLI, endLogSegNo,
5308 }
5309 else
5310 {
5311 /*
5312 * The switch happened at a segment boundary, so just create the next
5313 * segment on the new timeline.
5314 */
5315 int fd;
5316
5317 fd = XLogFileInit(startLogSegNo, newTLI);
5318
5319 if (close(fd) != 0)
5320 {
5321 int save_errno = errno;
5322
5323 XLogFileName(xlogfname, newTLI, startLogSegNo, wal_segment_size);
5324 errno = save_errno;
5325 ereport(ERROR,
5327 errmsg("could not close file \"%s\": %m", xlogfname)));
5328 }
5329 }
5330
5331 /*
5332 * Let's just make real sure there are not .ready or .done flags posted
5333 * for the new segment.
5334 */
5335 XLogFileName(xlogfname, newTLI, startLogSegNo, wal_segment_size);
5336 XLogArchiveCleanup(xlogfname);
5337}
static void XLogFileCopy(TimeLineID destTLI, XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, int upto)
Definition: xlog.c:3437

References Assert(), close, ereport, errcode_for_file_access(), errmsg(), ERROR, fd(), InvalidXLogRecPtr, MAXFNAMELEN, UpdateMinRecoveryPoint(), wal_segment_size, XLByteToPrevSeg, XLByteToSeg, XLogArchiveCleanup(), XLogFileCopy(), XLogFileInit(), XLogFileName(), and XLogSegmentOffset.

Referenced by StartupXLOG().

◆ XLogInsertAllowed()

bool XLogInsertAllowed ( void  )

Definition at line 6461 of file xlog.c.

6462{
6463 /*
6464 * If value is "unconditionally true" or "unconditionally false", just
6465 * return it. This provides the normal fast path once recovery is known
6466 * done.
6467 */
6468 if (LocalXLogInsertAllowed >= 0)
6469 return (bool) LocalXLogInsertAllowed;
6470
6471 /*
6472 * Else, must check to see if we're still in recovery.
6473 */
6474 if (RecoveryInProgress())
6475 return false;
6476
6477 /*
6478 * On exit from recovery, reset to "unconditionally true", since there is
6479 * no need to keep checking.
6480 */
6482 return true;
6483}

References LocalXLogInsertAllowed, and RecoveryInProgress().

Referenced by XLogBeginInsert(), XLogFlush(), XLogInsertRecord(), and XLogNeedsFlush().

◆ XLogInsertRecord()

XLogRecPtr XLogInsertRecord ( XLogRecData rdata,
XLogRecPtr  fpw_lsn,
uint8  flags,
int  num_fpi,
uint64  fpi_bytes,
bool  topxid_included 
)

Definition at line 749 of file xlog.c.

755{
757 pg_crc32c rdata_crc;
758 bool inserted;
759 XLogRecord *rechdr = (XLogRecord *) rdata->data;
760 uint8 info = rechdr->xl_info & ~XLR_INFO_MASK;
762 XLogRecPtr StartPos;
763 XLogRecPtr EndPos;
764 bool prevDoPageWrites = doPageWrites;
765 TimeLineID insertTLI;
766
767 /* Does this record type require special handling? */
768 if (unlikely(rechdr->xl_rmid == RM_XLOG_ID))
769 {
770 if (info == XLOG_SWITCH)
772 else if (info == XLOG_CHECKPOINT_REDO)
774 }
775
776 /* we assume that all of the record header is in the first chunk */
777 Assert(rdata->len >= SizeOfXLogRecord);
778
779 /* cross-check on whether we should be here or not */
780 if (!XLogInsertAllowed())
781 elog(ERROR, "cannot make new WAL entries during recovery");
782
783 /*
784 * Given that we're not in recovery, InsertTimeLineID is set and can't
785 * change, so we can read it without a lock.
786 */
787 insertTLI = XLogCtl->InsertTimeLineID;
788
789 /*----------
790 *
791 * We have now done all the preparatory work we can without holding a
792 * lock or modifying shared state. From here on, inserting the new WAL
793 * record to the shared WAL buffer cache is a two-step process:
794 *
795 * 1. Reserve the right amount of space from the WAL. The current head of
796 * reserved space is kept in Insert->CurrBytePos, and is protected by
797 * insertpos_lck.
798 *
799 * 2. Copy the record to the reserved WAL space. This involves finding the
800 * correct WAL buffer containing the reserved space, and copying the
801 * record in place. This can be done concurrently in multiple processes.
802 *
803 * To keep track of which insertions are still in-progress, each concurrent
804 * inserter acquires an insertion lock. In addition to just indicating that
805 * an insertion is in progress, the lock tells others how far the inserter
806 * has progressed. There is a small fixed number of insertion locks,
807 * determined by NUM_XLOGINSERT_LOCKS. When an inserter crosses a page
808 * boundary, it updates the value stored in the lock to the how far it has
809 * inserted, to allow the previous buffer to be flushed.
810 *
811 * Holding onto an insertion lock also protects RedoRecPtr and
812 * fullPageWrites from changing until the insertion is finished.
813 *
814 * Step 2 can usually be done completely in parallel. If the required WAL
815 * page is not initialized yet, you have to grab WALBufMappingLock to
816 * initialize it, but the WAL writer tries to do that ahead of insertions
817 * to avoid that from happening in the critical path.
818 *
819 *----------
820 */
822
823 if (likely(class == WALINSERT_NORMAL))
824 {
826
827 /*
828 * Check to see if my copy of RedoRecPtr is out of date. If so, may
829 * have to go back and have the caller recompute everything. This can
830 * only happen just after a checkpoint, so it's better to be slow in
831 * this case and fast otherwise.
832 *
833 * Also check to see if fullPageWrites was just turned on or there's a
834 * running backup (which forces full-page writes); if we weren't
835 * already doing full-page writes then go back and recompute.
836 *
837 * If we aren't doing full-page writes then RedoRecPtr doesn't
838 * actually affect the contents of the XLOG record, so we'll update
839 * our local copy but not force a recomputation. (If doPageWrites was
840 * just turned off, we could recompute the record without full pages,
841 * but we choose not to bother.)
842 */
843 if (RedoRecPtr != Insert->RedoRecPtr)
844 {
845 Assert(RedoRecPtr < Insert->RedoRecPtr);
846 RedoRecPtr = Insert->RedoRecPtr;
847 }
848 doPageWrites = (Insert->fullPageWrites || Insert->runningBackups > 0);
849
850 if (doPageWrites &&
851 (!prevDoPageWrites ||
852 (XLogRecPtrIsValid(fpw_lsn) && fpw_lsn <= RedoRecPtr)))
853 {
854 /*
855 * Oops, some buffer now needs to be backed up that the caller
856 * didn't back up. Start over.
857 */
860 return InvalidXLogRecPtr;
861 }
862
863 /*
864 * Reserve space for the record in the WAL. This also sets the xl_prev
865 * pointer.
866 */
867 ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
868 &rechdr->xl_prev);
869
870 /* Normal records are always inserted. */
871 inserted = true;
872 }
873 else if (class == WALINSERT_SPECIAL_SWITCH)
874 {
875 /*
876 * In order to insert an XLOG_SWITCH record, we need to hold all of
877 * the WAL insertion locks, not just one, so that no one else can
878 * begin inserting a record until we've figured out how much space
879 * remains in the current WAL segment and claimed all of it.
880 *
881 * Nonetheless, this case is simpler than the normal cases handled
882 * below, which must check for changes in doPageWrites and RedoRecPtr.
883 * Those checks are only needed for records that can contain buffer
884 * references, and an XLOG_SWITCH record never does.
885 */
886 Assert(!XLogRecPtrIsValid(fpw_lsn));
888 inserted = ReserveXLogSwitch(&StartPos, &EndPos, &rechdr->xl_prev);
889 }
890 else
891 {
893
894 /*
895 * We need to update both the local and shared copies of RedoRecPtr,
896 * which means that we need to hold all the WAL insertion locks.
897 * However, there can't be any buffer references, so as above, we need
898 * not check RedoRecPtr before inserting the record; we just need to
899 * update it afterwards.
900 */
901 Assert(!XLogRecPtrIsValid(fpw_lsn));
903 ReserveXLogInsertLocation(rechdr->xl_tot_len, &StartPos, &EndPos,
904 &rechdr->xl_prev);
905 RedoRecPtr = Insert->RedoRecPtr = StartPos;
906 inserted = true;
907 }
908
909 if (inserted)
910 {
911 /*
912 * Now that xl_prev has been filled in, calculate CRC of the record
913 * header.
914 */
915 rdata_crc = rechdr->xl_crc;
916 COMP_CRC32C(rdata_crc, rechdr, offsetof(XLogRecord, xl_crc));
917 FIN_CRC32C(rdata_crc);
918 rechdr->xl_crc = rdata_crc;
919
920 /*
921 * All the record data, including the header, is now ready to be
922 * inserted. Copy the record in the space reserved.
923 */
925 class == WALINSERT_SPECIAL_SWITCH, rdata,
926 StartPos, EndPos, insertTLI);
927
928 /*
929 * Unless record is flagged as not important, update LSN of last
930 * important record in the current slot. When holding all locks, just
931 * update the first one.
932 */
933 if ((flags & XLOG_MARK_UNIMPORTANT) == 0)
934 {
935 int lockno = holdingAllLocks ? 0 : MyLockNo;
936
937 WALInsertLocks[lockno].l.lastImportantAt = StartPos;
938 }
939 }
940 else
941 {
942 /*
943 * This was an xlog-switch record, but the current insert location was
944 * already exactly at the beginning of a segment, so there was no need
945 * to do anything.
946 */
947 }
948
949 /*
950 * Done! Let others know that we're finished.
951 */
953
955
957
958 /*
959 * Mark top transaction id is logged (if needed) so that we should not try
960 * to log it again with the next WAL record in the current subtransaction.
961 */
962 if (topxid_included)
964
965 /*
966 * Update shared LogwrtRqst.Write, if we crossed page boundary.
967 */
968 if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
969 {
971 /* advance global request to include new block(s) */
972 if (XLogCtl->LogwrtRqst.Write < EndPos)
973 XLogCtl->LogwrtRqst.Write = EndPos;
976 }
977
978 /*
979 * If this was an XLOG_SWITCH record, flush the record and the empty
980 * padding space that fills the rest of the segment, and perform
981 * end-of-segment actions (eg, notifying archiver).
982 */
983 if (class == WALINSERT_SPECIAL_SWITCH)
984 {
985 TRACE_POSTGRESQL_WAL_SWITCH();
986 XLogFlush(EndPos);
987
988 /*
989 * Even though we reserved the rest of the segment for us, which is
990 * reflected in EndPos, we return a pointer to just the end of the
991 * xlog-switch record.
992 */
993 if (inserted)
994 {
995 EndPos = StartPos + SizeOfXLogRecord;
996 if (StartPos / XLOG_BLCKSZ != EndPos / XLOG_BLCKSZ)
997 {
998 uint64 offset = XLogSegmentOffset(EndPos, wal_segment_size);
999
1000 if (offset == EndPos % XLOG_BLCKSZ)
1001 EndPos += SizeOfXLogLongPHD;
1002 else
1003 EndPos += SizeOfXLogShortPHD;
1004 }
1005 }
1006 }
1007
1008#ifdef WAL_DEBUG
1009 if (XLOG_DEBUG)
1010 {
1011 static XLogReaderState *debug_reader = NULL;
1012 XLogRecord *record;
1013 DecodedXLogRecord *decoded;
1015 StringInfoData recordBuf;
1016 char *errormsg = NULL;
1017 MemoryContext oldCxt;
1018
1019 oldCxt = MemoryContextSwitchTo(walDebugCxt);
1020
1022 appendStringInfo(&buf, "INSERT @ %X/%08X: ", LSN_FORMAT_ARGS(EndPos));
1023
1024 /*
1025 * We have to piece together the WAL record data from the XLogRecData
1026 * entries, so that we can pass it to the rm_desc function as one
1027 * contiguous chunk.
1028 */
1029 initStringInfo(&recordBuf);
1030 for (; rdata != NULL; rdata = rdata->next)
1031 appendBinaryStringInfo(&recordBuf, rdata->data, rdata->len);
1032
1033 /* We also need temporary space to decode the record. */
1034 record = (XLogRecord *) recordBuf.data;
1035 decoded = (DecodedXLogRecord *)
1037
1038 if (!debug_reader)
1039 debug_reader = XLogReaderAllocate(wal_segment_size, NULL,
1040 XL_ROUTINE(.page_read = NULL,
1041 .segment_open = NULL,
1042 .segment_close = NULL),
1043 NULL);
1044 if (!debug_reader)
1045 {
1046 appendStringInfoString(&buf, "error decoding record: out of memory while allocating a WAL reading processor");
1047 }
1048 else if (!DecodeXLogRecord(debug_reader,
1049 decoded,
1050 record,
1051 EndPos,
1052 &errormsg))
1053 {
1054 appendStringInfo(&buf, "error decoding record: %s",
1055 errormsg ? errormsg : "no error message");
1056 }
1057 else
1058 {
1059 appendStringInfoString(&buf, " - ");
1060
1061 debug_reader->record = decoded;
1062 xlog_outdesc(&buf, debug_reader);
1063 debug_reader->record = NULL;
1064 }
1065 elog(LOG, "%s", buf.data);
1066
1067 pfree(decoded);
1068 pfree(buf.data);
1069 pfree(recordBuf.data);
1070 MemoryContextSwitchTo(oldCxt);
1071 }
1072#endif
1073
1074 /*
1075 * Update our global variables
1076 */
1077 ProcLastRecPtr = StartPos;
1078 XactLastRecEnd = EndPos;
1079
1080 /* Report WAL traffic to the instrumentation. */
1081 if (inserted)
1082 {
1083 pgWalUsage.wal_bytes += rechdr->xl_tot_len;
1085 pgWalUsage.wal_fpi += num_fpi;
1086 pgWalUsage.wal_fpi_bytes += fpi_bytes;
1087
1088 /* Required for the flush of pending stats WAL data */
1089 pgstat_report_fixed = true;
1090 }
1091
1092 return EndPos;
1093}
#define likely(x)
Definition: c.h:406
#define unlikely(x)
Definition: c.h:407
if(TABLE==NULL||TABLE_index==NULL)
Definition: isn.c:81
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
void appendBinaryStringInfo(StringInfo str, const void *data, int datalen)
Definition: stringinfo.c:281
void appendStringInfoString(StringInfo str, const char *s)
Definition: stringinfo.c:230
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
uint64 wal_fpi_bytes
Definition: instrument.h:56
int64 wal_records
Definition: instrument.h:53
DecodedXLogRecord * record
Definition: xlogreader.h:235
pg_crc32c xl_crc
Definition: xlogrecord.h:49
void MarkSubxactTopXidLogged(void)
Definition: xact.c:592
void MarkCurrentTransactionIdLoggedIfAny(void)
Definition: xact.c:542
XLogRecPtr XactLastRecEnd
Definition: xlog.c:256
static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata, XLogRecPtr StartPos, XLogRecPtr EndPos, TimeLineID tli)
Definition: xlog.c:1231
static void ReserveXLogInsertLocation(int size, XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1114
static bool ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
Definition: xlog.c:1170
XLogReaderState * XLogReaderAllocate(int wal_segment_size, const char *waldir, XLogReaderRoutine *routine, void *private_data)
Definition: xlogreader.c:107
bool DecodeXLogRecord(XLogReaderState *state, DecodedXLogRecord *decoded, XLogRecord *record, XLogRecPtr lsn, char **errormsg)
Definition: xlogreader.c:1682
size_t DecodeXLogRecordRequiredSpace(size_t xl_tot_len)
Definition: xlogreader.c:1649
#define XL_ROUTINE(...)
Definition: xlogreader.h:117
#define XLR_INFO_MASK
Definition: xlogrecord.h:62
void xlog_outdesc(StringInfo buf, XLogReaderState *record)

References appendBinaryStringInfo(), appendStringInfo(), appendStringInfoString(), Assert(), buf, COMP_CRC32C, CopyXLogRecordToWAL(), XLogRecData::data, StringInfoData::data, DecodeXLogRecord(), DecodeXLogRecordRequiredSpace(), doPageWrites, elog, END_CRIT_SECTION, ERROR, FIN_CRC32C, holdingAllLocks, if(), XLogCtlData::info_lck, initStringInfo(), XLogCtlData::Insert, Insert(), XLogCtlData::InsertTimeLineID, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogRecData::len, likely, LOG, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MarkCurrentTransactionIdLoggedIfAny(), MarkSubxactTopXidLogged(), MemoryContextSwitchTo(), MyLockNo, XLogRecData::next, palloc(), pfree(), pgstat_report_fixed, pgWalUsage, ProcLastRecPtr, XLogReaderState::record, RedoRecPtr, RefreshXLogWriteResult, ReserveXLogInsertLocation(), ReserveXLogSwitch(), SizeOfXLogLongPHD, SizeOfXLogRecord, SizeOfXLogShortPHD, SpinLockAcquire, SpinLockRelease, START_CRIT_SECTION, unlikely, WalUsage::wal_bytes, WalUsage::wal_fpi, WalUsage::wal_fpi_bytes, WalUsage::wal_records, wal_segment_size, WALINSERT_NORMAL, WALINSERT_SPECIAL_CHECKPOINT, WALINSERT_SPECIAL_SWITCH, WALInsertLockAcquire(), WALInsertLockAcquireExclusive(), WALInsertLockRelease(), WALInsertLocks, XLogwrtRqst::Write, XactLastRecEnd, XLogRecord::xl_crc, XLogRecord::xl_info, XLogRecord::xl_prev, XLogRecord::xl_rmid, XL_ROUTINE, XLogRecord::xl_tot_len, XLOG_CHECKPOINT_REDO, XLOG_MARK_UNIMPORTANT, xlog_outdesc(), XLOG_SWITCH, XLogCtl, XLogFlush(), XLogInsertAllowed(), XLogReaderAllocate(), XLogRecPtrIsValid, XLogSegmentOffset, and XLR_INFO_MASK.

Referenced by XLogInsert().

◆ XLogNeedsFlush()

bool XLogNeedsFlush ( XLogRecPtr  record)

Definition at line 3127 of file xlog.c.

3128{
3129 /*
3130 * During recovery, we don't flush WAL but update minRecoveryPoint
3131 * instead. So "needs flush" is taken to mean whether minRecoveryPoint
3132 * would need to be updated.
3133 *
3134 * Using XLogInsertAllowed() rather than RecoveryInProgress() matters for
3135 * the case of an end-of-recovery checkpoint, where WAL data is flushed.
3136 * This check should be consistent with the one in XLogFlush().
3137 */
3138 if (!XLogInsertAllowed())
3139 {
3140 /* Quick exit if already known to be updated or cannot be updated */
3142 return false;
3143
3144 /*
3145 * An invalid minRecoveryPoint means that we need to recover all the
3146 * WAL, i.e., we're doing crash recovery. We never modify the control
3147 * file's value in that case, so we can short-circuit future checks
3148 * here too. This triggers a quick exit path for the startup process,
3149 * which cannot update its local copy of minRecoveryPoint as long as
3150 * it has not replayed all WAL available when doing crash recovery.
3151 */
3153 {
3154 updateMinRecoveryPoint = false;
3155 return false;
3156 }
3157
3158 /*
3159 * Update local copy of minRecoveryPoint. But if the lock is busy,
3160 * just return a conservative guess.
3161 */
3162 if (!LWLockConditionalAcquire(ControlFileLock, LW_SHARED))
3163 return true;
3166 LWLockRelease(ControlFileLock);
3167
3168 /*
3169 * Check minRecoveryPoint for any other process than the startup
3170 * process doing crash recovery, which should not update the control
3171 * file value if crash recovery is still running.
3172 */
3174 updateMinRecoveryPoint = false;
3175
3176 /* check again */
3178 return false;
3179 else
3180 return true;
3181 }
3182
3183 /* Quick exit if already known flushed */
3184 if (record <= LogwrtResult.Flush)
3185 return false;
3186
3187 /* read LogwrtResult and update local state */
3189
3190 /* check again */
3191 if (record <= LogwrtResult.Flush)
3192 return false;
3193
3194 return true;
3195}
bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1345

References ControlFile, XLogwrtResult::Flush, InRecovery, LocalMinRecoveryPoint, LocalMinRecoveryPointTLI, LogwrtResult, LW_SHARED, LWLockConditionalAcquire(), LWLockRelease(), ControlFileData::minRecoveryPoint, ControlFileData::minRecoveryPointTLI, RefreshXLogWriteResult, updateMinRecoveryPoint, XLogInsertAllowed(), and XLogRecPtrIsValid.

Referenced by GetVictimBuffer(), SetHintBits(), and XLogFlush().

◆ XLogPutNextOid()

void XLogPutNextOid ( Oid  nextOid)

Definition at line 8093 of file xlog.c.

8094{
8096 XLogRegisterData(&nextOid, sizeof(Oid));
8097 (void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID);
8098
8099 /*
8100 * We need not flush the NEXTOID record immediately, because any of the
8101 * just-allocated OIDs could only reach disk as part of a tuple insert or
8102 * update that would have its own XLOG record that must follow the NEXTOID
8103 * record. Therefore, the standard buffer LSN interlock applied to those
8104 * records will ensure no such OID reaches disk before the NEXTOID record
8105 * does.
8106 *
8107 * Note, however, that the above statement only covers state "within" the
8108 * database. When we use a generated OID as a file or directory name, we
8109 * are in a sense violating the basic WAL rule, because that filesystem
8110 * change may reach disk before the NEXTOID WAL record does. The impact
8111 * of this is that if a database crash occurs immediately afterward, we
8112 * might after restart re-generate the same OID and find that it conflicts
8113 * with the leftover file or directory. But since for safety's sake we
8114 * always loop until finding a nonconflicting filename, this poses no real
8115 * problem in practice. See pgsql-hackers discussion 27-Sep-2006.
8116 */
8117}

References XLOG_NEXTOID, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by GetNewObjectId().

◆ XLogRecPtrToBytePos()

static uint64 XLogRecPtrToBytePos ( XLogRecPtr  ptr)
static

Definition at line 1947 of file xlog.c.

1948{
1949 uint64 fullsegs;
1950 uint32 fullpages;
1951 uint32 offset;
1952 uint64 result;
1953
1954 XLByteToSeg(ptr, fullsegs, wal_segment_size);
1955
1956 fullpages = (XLogSegmentOffset(ptr, wal_segment_size)) / XLOG_BLCKSZ;
1957 offset = ptr % XLOG_BLCKSZ;
1958
1959 if (fullpages == 0)
1960 {
1961 result = fullsegs * UsableBytesInSegment;
1962 if (offset > 0)
1963 {
1964 Assert(offset >= SizeOfXLogLongPHD);
1965 result += offset - SizeOfXLogLongPHD;
1966 }
1967 }
1968 else
1969 {
1970 result = fullsegs * UsableBytesInSegment +
1971 (XLOG_BLCKSZ - SizeOfXLogLongPHD) + /* account for first page */
1972 (fullpages - 1) * UsableBytesInPage; /* full pages */
1973 if (offset > 0)
1974 {
1975 Assert(offset >= SizeOfXLogShortPHD);
1976 result += offset - SizeOfXLogShortPHD;
1977 }
1978 }
1979
1980 return result;
1981}

References Assert(), SizeOfXLogLongPHD, SizeOfXLogShortPHD, UsableBytesInPage, UsableBytesInSegment, wal_segment_size, XLByteToSeg, and XLogSegmentOffset.

Referenced by ReserveXLogInsertLocation(), ReserveXLogSwitch(), and StartupXLOG().

◆ XLogReportParameters()

static void XLogReportParameters ( void  )
static

Definition at line 8173 of file xlog.c.

8174{
8183 {
8184 /*
8185 * The change in number of backend slots doesn't need to be WAL-logged
8186 * if archiving is not enabled, as you can't start archive recovery
8187 * with wal_level=minimal anyway. We don't really care about the
8188 * values in pg_control either if wal_level=minimal, but seems better
8189 * to keep them up-to-date to avoid confusion.
8190 */
8192 {
8193 xl_parameter_change xlrec;
8194 XLogRecPtr recptr;
8195
8201 xlrec.wal_level = wal_level;
8204
8206 XLogRegisterData(&xlrec, sizeof(xlrec));
8207
8208 recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE);
8209 XLogFlush(recptr);
8210 }
8211
8212 LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
8213
8223
8224 LWLockRelease(ControlFileLock);
8225 }
8226}

References ControlFile, LW_EXCLUSIVE, LWLockAcquire(), LWLockRelease(), max_locks_per_xact, xl_parameter_change::max_locks_per_xact, ControlFileData::max_locks_per_xact, max_prepared_xacts, xl_parameter_change::max_prepared_xacts, ControlFileData::max_prepared_xacts, max_wal_senders, xl_parameter_change::max_wal_senders, ControlFileData::max_wal_senders, max_worker_processes, xl_parameter_change::max_worker_processes, ControlFileData::max_worker_processes, MaxConnections, xl_parameter_change::MaxConnections, ControlFileData::MaxConnections, track_commit_timestamp, xl_parameter_change::track_commit_timestamp, ControlFileData::track_commit_timestamp, UpdateControlFile(), wal_level, xl_parameter_change::wal_level, ControlFileData::wal_level, wal_log_hints, xl_parameter_change::wal_log_hints, ControlFileData::wal_log_hints, XLOG_PARAMETER_CHANGE, XLogBeginInsert(), XLogFlush(), XLogInsert(), XLogIsNeeded, and XLogRegisterData().

Referenced by StartupXLOG().

◆ XLogRestorePoint()

XLogRecPtr XLogRestorePoint ( const char *  rpName)

Definition at line 8148 of file xlog.c.

8149{
8150 XLogRecPtr RecPtr;
8151 xl_restore_point xlrec;
8152
8153 xlrec.rp_time = GetCurrentTimestamp();
8154 strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);
8155
8157 XLogRegisterData(&xlrec, sizeof(xl_restore_point));
8158
8159 RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT);
8160
8161 ereport(LOG,
8162 errmsg("restore point \"%s\" created at %X/%08X",
8163 rpName, LSN_FORMAT_ARGS(RecPtr)));
8164
8165 return RecPtr;
8166}
char rp_name[MAXFNAMELEN]
TimestampTz rp_time

References ereport, errmsg(), GetCurrentTimestamp(), LOG, LSN_FORMAT_ARGS, MAXFNAMELEN, xl_restore_point::rp_name, xl_restore_point::rp_time, strlcpy(), XLOG_RESTORE_POINT, XLogBeginInsert(), XLogInsert(), and XLogRegisterData().

Referenced by pg_create_restore_point().

◆ XLogSetAsyncXactLSN()

void XLogSetAsyncXactLSN ( XLogRecPtr  asyncXactLSN)

Definition at line 2612 of file xlog.c.

2613{
2614 XLogRecPtr WriteRqstPtr = asyncXactLSN;
2615 bool sleeping;
2616 bool wakeup = false;
2617 XLogRecPtr prevAsyncXactLSN;
2618
2620 sleeping = XLogCtl->WalWriterSleeping;
2621 prevAsyncXactLSN = XLogCtl->asyncXactLSN;
2622 if (XLogCtl->asyncXactLSN < asyncXactLSN)
2623 XLogCtl->asyncXactLSN = asyncXactLSN;
2625
2626 /*
2627 * If somebody else already called this function with a more aggressive
2628 * LSN, they will have done what we needed (and perhaps more).
2629 */
2630 if (asyncXactLSN <= prevAsyncXactLSN)
2631 return;
2632
2633 /*
2634 * If the WALWriter is sleeping, kick it to make it come out of low-power
2635 * mode, so that this async commit will reach disk within the expected
2636 * amount of time. Otherwise, determine whether it has enough WAL
2637 * available to flush, the same way that XLogBackgroundFlush() does.
2638 */
2639 if (sleeping)
2640 wakeup = true;
2641 else
2642 {
2643 int flushblocks;
2644
2646
2647 flushblocks =
2648 WriteRqstPtr / XLOG_BLCKSZ - LogwrtResult.Flush / XLOG_BLCKSZ;
2649
2650 if (WalWriterFlushAfter == 0 || flushblocks >= WalWriterFlushAfter)
2651 wakeup = true;
2652 }
2653
2654 if (wakeup)
2655 {
2656 volatile PROC_HDR *procglobal = ProcGlobal;
2657 ProcNumber walwriterProc = procglobal->walwriterProc;
2658
2659 if (walwriterProc != INVALID_PROC_NUMBER)
2660 SetLatch(&GetPGProcByNumber(walwriterProc)->procLatch);
2661 }
2662}
void SetLatch(Latch *latch)
Definition: latch.c:290
#define GetPGProcByNumber(n)
Definition: proc.h:440
#define INVALID_PROC_NUMBER
Definition: procnumber.h:26
int ProcNumber
Definition: procnumber.h:24
PROC_HDR * ProcGlobal
Definition: proc.c:79
Definition: proc.h:386
ProcNumber walwriterProc
Definition: proc.h:424
static TimestampTz wakeup[NUM_WALRCV_WAKEUPS]
Definition: walreceiver.c:130

References XLogCtlData::asyncXactLSN, XLogwrtResult::Flush, GetPGProcByNumber, XLogCtlData::info_lck, INVALID_PROC_NUMBER, LogwrtResult, ProcGlobal, RefreshXLogWriteResult, SetLatch(), SpinLockAcquire, SpinLockRelease, wakeup, WalWriterFlushAfter, PROC_HDR::walwriterProc, XLogCtlData::WalWriterSleeping, and XLogCtl.

Referenced by AbortTransaction(), LogCurrentRunningXacts(), RecordTransactionAbort(), and RecordTransactionCommit().

◆ XLogSetReplicationSlotMinimumLSN()

void XLogSetReplicationSlotMinimumLSN ( XLogRecPtr  lsn)

◆ XLOGShmemInit()

void XLOGShmemInit ( void  )

Definition at line 4974 of file xlog.c.

4975{
4976 bool foundCFile,
4977 foundXLog;
4978 char *allocptr;
4979 int i;
4980 ControlFileData *localControlFile;
4981
4982#ifdef WAL_DEBUG
4983
4984 /*
4985 * Create a memory context for WAL debugging that's exempt from the normal
4986 * "no pallocs in critical section" rule. Yes, that can lead to a PANIC if
4987 * an allocation fails, but wal_debug is not for production use anyway.
4988 */
4989 if (walDebugCxt == NULL)
4990 {
4992 "WAL Debug",
4994 MemoryContextAllowInCriticalSection(walDebugCxt, true);
4995 }
4996#endif
4997
4998
4999 XLogCtl = (XLogCtlData *)
5000 ShmemInitStruct("XLOG Ctl", XLOGShmemSize(), &foundXLog);
5001
5002 localControlFile = ControlFile;
5004 ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
5005
5006 if (foundCFile || foundXLog)
5007 {
5008 /* both should be present or neither */
5009 Assert(foundCFile && foundXLog);
5010
5011 /* Initialize local copy of WALInsertLocks */
5013
5014 if (localControlFile)
5015 pfree(localControlFile);
5016 return;
5017 }
5018 memset(XLogCtl, 0, sizeof(XLogCtlData));
5019
5020 /*
5021 * Already have read control file locally, unless in bootstrap mode. Move
5022 * contents into shared memory.
5023 */
5024 if (localControlFile)
5025 {
5026 memcpy(ControlFile, localControlFile, sizeof(ControlFileData));
5027 pfree(localControlFile);
5028 }
5029
5030 /*
5031 * Since XLogCtlData contains XLogRecPtr fields, its sizeof should be a
5032 * multiple of the alignment for same, so no extra alignment padding is
5033 * needed here.
5034 */
5035 allocptr = ((char *) XLogCtl) + sizeof(XLogCtlData);
5036 XLogCtl->xlblocks = (pg_atomic_uint64 *) allocptr;
5037 allocptr += sizeof(pg_atomic_uint64) * XLOGbuffers;
5038
5039 for (i = 0; i < XLOGbuffers; i++)
5040 {
5042 }
5043
5044 /* WAL insertion locks. Ensure they're aligned to the full padded size */
5045 allocptr += sizeof(WALInsertLockPadded) -
5046 ((uintptr_t) allocptr) % sizeof(WALInsertLockPadded);
5048 (WALInsertLockPadded *) allocptr;
5049 allocptr += sizeof(WALInsertLockPadded) * NUM_XLOGINSERT_LOCKS;
5050
5051 for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
5052 {
5053 LWLockInitialize(&WALInsertLocks[i].l.lock, LWTRANCHE_WAL_INSERT);
5056 }
5057
5058 /*
5059 * Align the start of the page buffers to a full xlog block size boundary.
5060 * This simplifies some calculations in XLOG insertion. It is also
5061 * required for O_DIRECT.
5062 */
5063 allocptr = (char *) TYPEALIGN(XLOG_BLCKSZ, allocptr);
5064 XLogCtl->pages = allocptr;
5065 memset(XLogCtl->pages, 0, (Size) XLOG_BLCKSZ * XLOGbuffers);
5066
5067 /*
5068 * Do basic initialization of XLogCtl shared data. (StartupXLOG will fill
5069 * in additional info.)
5070 */
5074 XLogCtl->WalWriterSleeping = false;
5075
5082}
static void pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
Definition: atomics.h:451
struct pg_atomic_uint64 pg_atomic_uint64
void LWLockInitialize(LWLock *lock, int tranche_id)
Definition: lwlock.c:698
MemoryContext TopMemoryContext
Definition: mcxt.c:166
void MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
Definition: mcxt.c:740
#define AllocSetContextCreate
Definition: memutils.h:129
#define ALLOCSET_DEFAULT_SIZES
Definition: memutils.h:160
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:388
#define SpinLockInit(lock)
Definition: spin.h:57
int XLogCacheBlck
Definition: xlog.c:495
WALInsertLockPadded * WALInsertLocks
Definition: xlog.c:446
slock_t insertpos_lck
Definition: xlog.c:400
union WALInsertLockPadded WALInsertLockPadded
Size XLOGShmemSize(void)
Definition: xlog.c:4924
struct XLogCtlData XLogCtlData

References ALLOCSET_DEFAULT_SIZES, AllocSetContextCreate, Assert(), ControlFile, i, XLogCtlData::info_lck, XLogCtlData::Insert, XLogCtlInsert::insertpos_lck, XLogCtlData::InstallXLogFileSegmentActive, InvalidXLogRecPtr, WALInsertLockPadded::l, WALInsertLock::lastImportantAt, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LWLockInitialize(), MemoryContextAllowInCriticalSection(), NUM_XLOGINSERT_LOCKS, XLogCtlData::pages, pfree(), pg_atomic_init_u64(), RECOVERY_STATE_CRASH, XLogCtlData::SharedRecoveryState, ShmemInitStruct(), SpinLockInit, TopMemoryContext, TYPEALIGN, XLogCtlData::unloggedLSN, XLogCtlInsert::WALInsertLocks, WALInsertLocks, XLogCtlData::WalWriterSleeping, XLogCtlData::xlblocks, XLOGbuffers, XLogCtlData::XLogCacheBlck, XLogCtl, and XLOGShmemSize().

Referenced by CreateOrAttachShmemStructs().

◆ XLOGShmemSize()

Size XLOGShmemSize ( void  )

Definition at line 4924 of file xlog.c.

4925{
4926 Size size;
4927
4928 /*
4929 * If the value of wal_buffers is -1, use the preferred auto-tune value.
4930 * This isn't an amazingly clean place to do this, but we must wait till
4931 * NBuffers has received its final value, and must do it before using the
4932 * value of XLOGbuffers to do anything important.
4933 *
4934 * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
4935 * However, if the DBA explicitly set wal_buffers = -1 in the config file,
4936 * then PGC_S_DYNAMIC_DEFAULT will fail to override that and we must force
4937 * the matter with PGC_S_OVERRIDE.
4938 */
4939 if (XLOGbuffers == -1)
4940 {
4941 char buf[32];
4942
4943 snprintf(buf, sizeof(buf), "%d", XLOGChooseNumBuffers());
4944 SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4946 if (XLOGbuffers == -1) /* failed to apply it? */
4947 SetConfigOption("wal_buffers", buf, PGC_POSTMASTER,
4949 }
4950 Assert(XLOGbuffers > 0);
4951
4952 /* XLogCtl */
4953 size = sizeof(XLogCtlData);
4954
4955 /* WAL insertion locks, plus alignment */
4956 size = add_size(size, mul_size(sizeof(WALInsertLockPadded), NUM_XLOGINSERT_LOCKS + 1));
4957 /* xlblocks array */
4958 size = add_size(size, mul_size(sizeof(pg_atomic_uint64), XLOGbuffers));
4959 /* extra alignment padding for XLOG I/O buffers */
4960 size = add_size(size, Max(XLOG_BLCKSZ, PG_IO_ALIGN_SIZE));
4961 /* and the buffers themselves */
4962 size = add_size(size, mul_size(XLOG_BLCKSZ, XLOGbuffers));
4963
4964 /*
4965 * Note: we don't count ControlFileData, it comes out of the "slop factor"
4966 * added by CreateSharedMemoryAndSemaphores. This lets us use this
4967 * routine again below to compute the actual allocation size.
4968 */
4969
4970 return size;
4971}
#define Max(x, y)
Definition: c.h:1002
@ PGC_S_OVERRIDE
Definition: guc.h:123
@ PGC_POSTMASTER
Definition: guc.h:74
#define PG_IO_ALIGN_SIZE
Size add_size(Size s1, Size s2)
Definition: shmem.c:494
Size mul_size(Size s1, Size s2)
Definition: shmem.c:511

References add_size(), Assert(), buf, Max, mul_size(), NUM_XLOGINSERT_LOCKS, PG_IO_ALIGN_SIZE, PGC_POSTMASTER, PGC_S_DYNAMIC_DEFAULT, PGC_S_OVERRIDE, SetConfigOption(), snprintf, XLOGbuffers, and XLOGChooseNumBuffers().

Referenced by CalculateShmemSize(), and XLOGShmemInit().

◆ XLogShutdownWalRcv()

void XLogShutdownWalRcv ( void  )

Definition at line 9537 of file xlog.c.

9538{
9540
9543}
#define AmStartupProcess()
Definition: miscadmin.h:390
void ShutdownWalRcv(void)
void ResetInstallXLogFileSegmentActive(void)
Definition: xlog.c:9556

References AmStartupProcess, Assert(), IsUnderPostmaster, ResetInstallXLogFileSegmentActive(), and ShutdownWalRcv().

Referenced by FinishWalRecovery(), and WaitForWALToBecomeAvailable().

◆ XLogWrite()

static void XLogWrite ( XLogwrtRqst  WriteRqst,
TimeLineID  tli,
bool  flexible 
)
static

Definition at line 2307 of file xlog.c.

2308{
2309 bool ispartialpage;
2310 bool last_iteration;
2311 bool finishing_seg;
2312 int curridx;
2313 int npages;
2314 int startidx;
2315 uint32 startoffset;
2316
2317 /* We should always be inside a critical section here */
2319
2320 /*
2321 * Update local LogwrtResult (caller probably did this already, but...)
2322 */
2324
2325 /*
2326 * Since successive pages in the xlog cache are consecutively allocated,
2327 * we can usually gather multiple pages together and issue just one
2328 * write() call. npages is the number of pages we have determined can be
2329 * written together; startidx is the cache block index of the first one,
2330 * and startoffset is the file offset at which it should go. The latter
2331 * two variables are only valid when npages > 0, but we must initialize
2332 * all of them to keep the compiler quiet.
2333 */
2334 npages = 0;
2335 startidx = 0;
2336 startoffset = 0;
2337
2338 /*
2339 * Within the loop, curridx is the cache block index of the page to
2340 * consider writing. Begin at the buffer containing the next unwritten
2341 * page, or last partially written page.
2342 */
2344
2345 while (LogwrtResult.Write < WriteRqst.Write)
2346 {
2347 /*
2348 * Make sure we're not ahead of the insert process. This could happen
2349 * if we're passed a bogus WriteRqst.Write that is past the end of the
2350 * last page that's been initialized by AdvanceXLInsertBuffer.
2351 */
2352 XLogRecPtr EndPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[curridx]);
2353
2354 if (LogwrtResult.Write >= EndPtr)
2355 elog(PANIC, "xlog write request %X/%08X is past end of log %X/%08X",
2357 LSN_FORMAT_ARGS(EndPtr));
2358
2359 /* Advance LogwrtResult.Write to end of current buffer page */
2360 LogwrtResult.Write = EndPtr;
2361 ispartialpage = WriteRqst.Write < LogwrtResult.Write;
2362
2365 {
2366 /*
2367 * Switch to new logfile segment. We cannot have any pending
2368 * pages here (since we dump what we have at segment end).
2369 */
2370 Assert(npages == 0);
2371 if (openLogFile >= 0)
2372 XLogFileClose();
2375 openLogTLI = tli;
2376
2377 /* create/use new log file */
2380 }
2381
2382 /* Make sure we have the current logfile open */
2383 if (openLogFile < 0)
2384 {
2387 openLogTLI = tli;
2390 }
2391
2392 /* Add current page to the set of pending pages-to-dump */
2393 if (npages == 0)
2394 {
2395 /* first of group */
2396 startidx = curridx;
2397 startoffset = XLogSegmentOffset(LogwrtResult.Write - XLOG_BLCKSZ,
2399 }
2400 npages++;
2401
2402 /*
2403 * Dump the set if this will be the last loop iteration, or if we are
2404 * at the last page of the cache area (since the next page won't be
2405 * contiguous in memory), or if we are at the end of the logfile
2406 * segment.
2407 */
2408 last_iteration = WriteRqst.Write <= LogwrtResult.Write;
2409
2410 finishing_seg = !ispartialpage &&
2411 (startoffset + npages * XLOG_BLCKSZ) >= wal_segment_size;
2412
2413 if (last_iteration ||
2414 curridx == XLogCtl->XLogCacheBlck ||
2415 finishing_seg)
2416 {
2417 char *from;
2418 Size nbytes;
2419 Size nleft;
2420 ssize_t written;
2422
2423 /* OK to write the page(s) */
2424 from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
2425 nbytes = npages * (Size) XLOG_BLCKSZ;
2426 nleft = nbytes;
2427 do
2428 {
2429 errno = 0;
2430
2431 /*
2432 * Measure I/O timing to write WAL data, for pg_stat_io.
2433 */
2435
2436 pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
2437 written = pg_pwrite(openLogFile, from, nleft, startoffset);
2439
2441 IOOP_WRITE, start, 1, written);
2442
2443 if (written <= 0)
2444 {
2445 char xlogfname[MAXFNAMELEN];
2446 int save_errno;
2447
2448 if (errno == EINTR)
2449 continue;
2450
2451 save_errno = errno;
2452 XLogFileName(xlogfname, tli, openLogSegNo,
2454 errno = save_errno;
2455 ereport(PANIC,
2457 errmsg("could not write to log file \"%s\" at offset %u, length %zu: %m",
2458 xlogfname, startoffset, nleft)));
2459 }
2460 nleft -= written;
2461 from += written;
2462 startoffset += written;
2463 } while (nleft > 0);
2464
2465 npages = 0;
2466
2467 /*
2468 * If we just wrote the whole last page of a logfile segment,
2469 * fsync the segment immediately. This avoids having to go back
2470 * and re-open prior segments when an fsync request comes along
2471 * later. Doing it here ensures that one and only one backend will
2472 * perform this fsync.
2473 *
2474 * This is also the right place to notify the Archiver that the
2475 * segment is ready to copy to archival storage, and to update the
2476 * timer for archive_timeout, and to signal for a checkpoint if
2477 * too many logfile segments have been used since the last
2478 * checkpoint.
2479 */
2480 if (finishing_seg)
2481 {
2483
2484 /* signal that we need to wakeup walsenders later */
2486
2487 LogwrtResult.Flush = LogwrtResult.Write; /* end of page */
2488
2489 if (XLogArchivingActive())
2491
2492 XLogCtl->lastSegSwitchTime = (pg_time_t) time(NULL);
2494
2495 /*
2496 * Request a checkpoint if we've consumed too much xlog since
2497 * the last one. For speed, we first check using the local
2498 * copy of RedoRecPtr, which might be out of date; if it looks
2499 * like a checkpoint is needed, forcibly update RedoRecPtr and
2500 * recheck.
2501 */
2503 {
2504 (void) GetRedoRecPtr();
2507 }
2508 }
2509 }
2510
2511 if (ispartialpage)
2512 {
2513 /* Only asked to write a partial page */
2514 LogwrtResult.Write = WriteRqst.Write;
2515 break;
2516 }
2517 curridx = NextBufIdx(curridx);
2518
2519 /* If flexible, break out of loop as soon as we wrote something */
2520 if (flexible && npages == 0)
2521 break;
2522 }
2523
2524 Assert(npages == 0);
2525
2526 /*
2527 * If asked to flush, do so
2528 */
2529 if (LogwrtResult.Flush < WriteRqst.Flush &&
2531 {
2532 /*
2533 * Could get here without iterating above loop, in which case we might
2534 * have no open file or the wrong one. However, we do not need to
2535 * fsync more than one file.
2536 */
2539 {
2540 if (openLogFile >= 0 &&
2543 XLogFileClose();
2544 if (openLogFile < 0)
2545 {
2548 openLogTLI = tli;
2551 }
2552
2554 }
2555
2556 /* signal that we need to wakeup walsenders later */
2558
2560 }
2561
2562 /*
2563 * Update shared-memory status
2564 *
2565 * We make sure that the shared 'request' values do not fall behind the
2566 * 'result' values. This is not absolutely essential, but it saves some
2567 * code in a couple of places.
2568 */
2575
2576 /*
2577 * We write Write first, bar, then Flush. When reading, the opposite must
2578 * be done (with a matching barrier in between), so that we always see a
2579 * Flush value that trails behind the Write value seen.
2580 */
2584
2585#ifdef USE_ASSERT_CHECKING
2586 {
2590
2596
2597 /* WAL written to disk is always ahead of WAL flushed */
2598 Assert(Write >= Flush);
2599
2600 /* WAL inserted to buffers is always ahead of WAL written */
2601 Assert(Insert >= Write);
2602 }
2603#endif
2604}
void ReserveExternalFD(void)
Definition: fd.c:1220
volatile uint32 CritSectionCount
Definition: globals.c:45
XLogRecPtr Flush
Definition: walreceiver.c:112
XLogRecPtr Write
Definition: walreceiver.c:111
#define WalSndWakeupRequest()
Definition: walsender.h:58
#define EINTR
Definition: win32_port.h:364
XLogRecPtr GetRedoRecPtr(void)
Definition: xlog.c:6509
int XLogFileOpen(XLogSegNo segno, TimeLineID tli)
Definition: xlog.c:3637
#define NextBufIdx(idx)
Definition: xlog.c:585
void issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
Definition: xlog.c:8759
bool XLogCheckpointNeeded(XLogSegNo new_segno)
Definition: xlog.c:2283
void XLogArchiveNotifySeg(XLogSegNo segno, TimeLineID tli)
Definition: xlogarchive.c:492

References Assert(), CHECKPOINT_CAUSE_XLOG, CritSectionCount, EINTR, elog, ereport, errcode_for_file_access(), errmsg(), XLogwrtRqst::Flush, XLogwrtResult::Flush, Flush, GetRedoRecPtr(), XLogCtlData::info_lck, Insert(), IOCONTEXT_NORMAL, IOOBJECT_WAL, IOOP_WRITE, issue_xlog_fsync(), IsUnderPostmaster, XLogCtlData::lastSegSwitchLSN, XLogCtlData::lastSegSwitchTime, XLogCtlData::logFlushResult, XLogCtlData::logInsertResult, XLogCtlData::logWriteResult, LogwrtResult, XLogCtlData::LogwrtRqst, LSN_FORMAT_ARGS, MAXFNAMELEN, NextBufIdx, openLogFile, openLogSegNo, openLogTLI, XLogCtlData::pages, PANIC, pg_atomic_read_u64(), pg_atomic_write_u64(), pg_pwrite, pg_read_barrier, pg_write_barrier, pgstat_count_io_op_time(), pgstat_prepare_io_time(), pgstat_report_wait_end(), pgstat_report_wait_start(), RefreshXLogWriteResult, RequestCheckpoint(), ReserveExternalFD(), SpinLockAcquire, SpinLockRelease, start, track_wal_io_timing, wal_segment_size, wal_sync_method, WAL_SYNC_METHOD_OPEN, WAL_SYNC_METHOD_OPEN_DSYNC, WalSndWakeupRequest, XLogwrtRqst::Write, XLogwrtResult::Write, Write, XLogCtlData::xlblocks, XLByteInPrevSeg, XLByteToPrevSeg, XLogArchiveNotifySeg(), XLogArchivingActive, XLogCtlData::XLogCacheBlck, XLogCheckpointNeeded(), XLogCtl, XLogFileClose(), XLogFileInit(), XLogFileName(), XLogFileOpen(), XLogRecPtrToBufIdx, and XLogSegmentOffset.

Referenced by AdvanceXLInsertBuffer(), XLogBackgroundFlush(), and XLogFlush().

Variable Documentation

◆ archive_mode_options

const struct config_enum_entry archive_mode_options[]
Initial value:
= {
{"always", ARCHIVE_MODE_ALWAYS, false},
{"on", ARCHIVE_MODE_ON, false},
{"off", ARCHIVE_MODE_OFF, false},
{"true", ARCHIVE_MODE_ON, true},
{"false", ARCHIVE_MODE_OFF, true},
{"yes", ARCHIVE_MODE_ON, true},
{"no", ARCHIVE_MODE_OFF, true},
{"1", ARCHIVE_MODE_ON, true},
{"0", ARCHIVE_MODE_OFF, true},
{NULL, 0, false}
}
@ ARCHIVE_MODE_ALWAYS
Definition: xlog.h:67
@ ARCHIVE_MODE_OFF
Definition: xlog.h:65
@ ARCHIVE_MODE_ON
Definition: xlog.h:66

Definition at line 193 of file xlog.c.

◆ check_wal_consistency_checking_deferred

bool check_wal_consistency_checking_deferred = false
static

Definition at line 168 of file xlog.c.

Referenced by check_wal_consistency_checking(), and InitializeWalConsistencyChecking().

◆ CheckPointDistanceEstimate

double CheckPointDistanceEstimate = 0
static

Definition at line 161 of file xlog.c.

Referenced by LogCheckpointEnd(), UpdateCheckPointDistanceEstimate(), and XLOGfileslop().

◆ CheckPointSegments

int CheckPointSegments

◆ CheckpointStats

◆ CommitDelay

int CommitDelay = 0

Definition at line 134 of file xlog.c.

Referenced by XLogFlush().

◆ CommitSiblings

int CommitSiblings = 5

Definition at line 135 of file xlog.c.

Referenced by XLogFlush().

◆ ControlFile

◆ doPageWrites

bool doPageWrites
static

◆ EnableHotStandby

◆ fullPageWrites

bool fullPageWrites = true

Definition at line 124 of file xlog.c.

Referenced by BootStrapXLOG(), and UpdateFullPageWrites().

◆ holdingAllLocks

bool holdingAllLocks = false
static

◆ lastFullPageWrites

bool lastFullPageWrites
static

Definition at line 219 of file xlog.c.

Referenced by StartupXLOG(), and xlog_redo().

◆ LocalMinRecoveryPoint

XLogRecPtr LocalMinRecoveryPoint
static

◆ LocalMinRecoveryPointTLI

TimeLineID LocalMinRecoveryPointTLI
static

◆ LocalRecoveryInProgress

bool LocalRecoveryInProgress = true
static

Definition at line 226 of file xlog.c.

Referenced by RecoveryInProgress().

◆ LocalXLogInsertAllowed

int LocalXLogInsertAllowed = -1
static

Definition at line 238 of file xlog.c.

Referenced by CreateCheckPoint(), LocalSetXLogInsertAllowed(), and XLogInsertAllowed().

◆ log_checkpoints

bool log_checkpoints = true

◆ LogwrtResult

◆ max_slot_wal_keep_size_mb

int max_slot_wal_keep_size_mb = -1

Definition at line 137 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ max_wal_size_mb

int max_wal_size_mb = 1024

◆ min_wal_size_mb

int min_wal_size_mb = 80

Definition at line 117 of file xlog.c.

Referenced by ReadControlFile(), and XLOGfileslop().

◆ MyLockNo

int MyLockNo = 0
static

◆ openLogFile

int openLogFile = -1
static

◆ openLogSegNo

XLogSegNo openLogSegNo = 0
static

Definition at line 637 of file xlog.c.

Referenced by assign_wal_sync_method(), XLogBackgroundFlush(), XLogFileClose(), and XLogWrite().

◆ openLogTLI

TimeLineID openLogTLI = 0
static

Definition at line 638 of file xlog.c.

Referenced by assign_wal_sync_method(), BootStrapXLOG(), XLogFileClose(), and XLogWrite().

◆ PrevCheckPointDistance

double PrevCheckPointDistance = 0
static

Definition at line 162 of file xlog.c.

Referenced by LogCheckpointEnd(), and UpdateCheckPointDistanceEstimate().

◆ ProcLastRecPtr

◆ RedoRecPtr

◆ sessionBackupState

SessionBackupState sessionBackupState = SESSION_BACKUP_NONE
static

◆ track_wal_io_timing

◆ updateMinRecoveryPoint

bool updateMinRecoveryPoint = true
static

Definition at line 649 of file xlog.c.

Referenced by SwitchIntoArchiveRecovery(), UpdateMinRecoveryPoint(), and XLogNeedsFlush().

◆ UsableBytesInSegment

int UsableBytesInSegment
static

◆ wal_compression

int wal_compression = WAL_COMPRESSION_NONE

Definition at line 126 of file xlog.c.

Referenced by XLogCompressBackupBlock(), and XLogRecordAssemble().

◆ wal_consistency_checking

bool* wal_consistency_checking = NULL

Definition at line 128 of file xlog.c.

Referenced by assign_wal_consistency_checking(), and XLogRecordAssemble().

◆ wal_consistency_checking_string

char* wal_consistency_checking_string = NULL

Definition at line 127 of file xlog.c.

Referenced by InitializeWalConsistencyChecking().

◆ wal_decode_buffer_size

int wal_decode_buffer_size = 512 * 1024

Definition at line 138 of file xlog.c.

Referenced by InitWalRecovery().

◆ wal_init_zero

bool wal_init_zero = true

Definition at line 129 of file xlog.c.

Referenced by XLogFileInitInternal().

◆ wal_keep_size_mb

int wal_keep_size_mb = 0

Definition at line 118 of file xlog.c.

Referenced by KeepLogSeg(), and pg_get_replication_slots().

◆ wal_level

◆ wal_log_hints

bool wal_log_hints = false

Definition at line 125 of file xlog.c.

Referenced by InitControlFile(), and XLogReportParameters().

◆ wal_recycle

bool wal_recycle = true

Definition at line 130 of file xlog.c.

Referenced by RemoveXlogFile().

◆ wal_retrieve_retry_interval

int wal_retrieve_retry_interval = 5000

Definition at line 136 of file xlog.c.

Referenced by ApplyLauncherMain(), launch_sync_worker(), and WaitForWALToBecomeAvailable().

◆ wal_segment_size

int wal_segment_size = DEFAULT_XLOG_SEG_SIZE

Definition at line 145 of file xlog.c.

Referenced by AdvanceXLInsertBuffer(), assign_wal_sync_method(), BootStrapXLOG(), build_backup_content(), CalculateCheckpointSegments(), CheckArchiveTimeout(), CheckXLogRemoved(), CleanupAfterArchiveRecovery(), copy_replication_slot(), CopyXLogRecordToWAL(), CreateCheckPoint(), CreateOverwriteContrecordRecord(), CreateRestartPoint(), do_pg_backup_stop(), ExecuteRecoveryCommand(), FinishWalRecovery(), GetOldestUnsummarizedLSN(), GetWALAvailability(), GetXLogBuffer(), InitWalRecovery(), InitXLogReaderState(), InstallXLogFileSegment(), InvalidateObsoleteReplicationSlots(), IsCheckpointOnSchedule(), issue_xlog_fsync(), KeepLogSeg(), LogicalConfirmReceivedLocation(), MaybeRemoveOldWalSummaries(), perform_base_backup(), pg_control_checkpoint(), pg_get_replication_slots(), pg_split_walfile_name(), pg_walfile_name(), pg_walfile_name_offset(), PreallocXlogFiles(), ReadControlFile(), ReadRecord(), RemoveNonParentXlogFiles(), RemoveOldXlogFiles(), ReorderBufferRestoreChanges(), ReorderBufferRestoreCleanup(), ReorderBufferSerializedPath(), ReorderBufferSerializeTXN(), ReplicationSlotReserveWal(), RequestXLogStreaming(), reserve_wal_for_local_slot(), ReserveXLogSwitch(), RestoreArchivedFile(), StartReplication(), StartupDecodingContext(), SummarizeWAL(), UpdateLastRemovedPtr(), WALReadRaiseError(), WalReceiverMain(), WalSndSegmentOpen(), WriteControlFile(), XLogArchiveNotifySeg(), XLogBackgroundFlush(), XLogBytePosToEndRecPtr(), XLogBytePosToRecPtr(), XLogCheckpointNeeded(), XLOGChooseNumBuffers(), XLogFileClose(), XLogFileCopy(), XLogFileInitInternal(), XLogFileOpen(), XLogFileRead(), XLogFileReadAnyTLI(), XLOGfileslop(), XLogGetOldestSegno(), XLogInitNewTimeline(), XLogInsertRecord(), XLogPageRead(), XLogReaderAllocate(), XlogReadTwoPhaseData(), XLogRecPtrToBytePos(), XLogWalRcvClose(), XLogWalRcvWrite(), and XLogWrite().

◆ wal_sync_method

◆ wal_sync_method_options

const struct config_enum_entry wal_sync_method_options[]
Initial value:
= {
{"fsync", WAL_SYNC_METHOD_FSYNC, false},
{"fdatasync", WAL_SYNC_METHOD_FDATASYNC, false},
{NULL, 0, false}
}

Definition at line 173 of file xlog.c.

◆ WALInsertLocks

◆ XactLastCommitEnd

◆ XactLastRecEnd

◆ XLogArchiveCommand

char* XLogArchiveCommand = NULL

◆ XLogArchiveMode

◆ XLogArchiveTimeout

int XLogArchiveTimeout = 0

Definition at line 120 of file xlog.c.

Referenced by CheckArchiveTimeout(), and CheckpointerMain().

◆ XLOGbuffers

int XLOGbuffers = -1

Definition at line 119 of file xlog.c.

Referenced by check_wal_buffers(), XLOGShmemInit(), and XLOGShmemSize().

◆ XLogCtl