PostgreSQL Source Code git master
pg_stat_statements.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_stat_statements.c
4 * Track statement planning and execution times as well as resource
5 * usage across a whole database cluster.
6 *
7 * Execution costs are totaled for each distinct source query, and kept in
8 * a shared hashtable. (We track only as many distinct queries as will fit
9 * in the designated amount of shared memory.)
10 *
11 * Starting in Postgres 9.2, this module normalized query entries. As of
12 * Postgres 14, the normalization is done by the core if compute_query_id is
13 * enabled, or optionally by third-party modules.
14 *
15 * To facilitate presenting entries to users, we create "representative" query
16 * strings in which constants are replaced with parameter symbols ($n), to
17 * make it clearer what a normalized entry can represent. To save on shared
18 * memory, and to avoid having to truncate oversized query strings, we store
19 * these strings in a temporary external query-texts file. Offsets into this
20 * file are kept in shared memory.
21 *
22 * Note about locking issues: to create or delete an entry in the shared
23 * hashtable, one must hold pgss->lock exclusively. Modifying any field
24 * in an entry except the counters requires the same. To look up an entry,
25 * one must hold the lock shared. To read or update the counters within
26 * an entry, one must hold the lock shared or exclusive (so the entry doesn't
27 * disappear!) and also take the entry's mutex spinlock.
28 * The shared state variable pgss->extent (the next free spot in the external
29 * query-text file) should be accessed only while holding either the
30 * pgss->mutex spinlock, or exclusive lock on pgss->lock. We use the mutex to
31 * allow reserving file space while holding only shared lock on pgss->lock.
32 * Rewriting the entire external query-text file, eg for garbage collection,
33 * requires holding pgss->lock exclusively; this allows individual entries
34 * in the file to be read or written while holding only shared lock.
35 *
36 *
37 * Copyright (c) 2008-2025, PostgreSQL Global Development Group
38 *
39 * IDENTIFICATION
40 * contrib/pg_stat_statements/pg_stat_statements.c
41 *
42 *-------------------------------------------------------------------------
43 */
44#include "postgres.h"
45
46#include <math.h>
47#include <sys/stat.h>
48#include <unistd.h>
49
50#include "access/htup_details.h"
51#include "access/parallel.h"
52#include "catalog/pg_authid.h"
53#include "common/int.h"
54#include "executor/instrument.h"
55#include "funcapi.h"
56#include "jit/jit.h"
57#include "mb/pg_wchar.h"
58#include "miscadmin.h"
59#include "nodes/queryjumble.h"
60#include "optimizer/planner.h"
61#include "parser/analyze.h"
62#include "parser/scanner.h"
63#include "pgstat.h"
64#include "storage/fd.h"
65#include "storage/ipc.h"
66#include "storage/lwlock.h"
67#include "storage/shmem.h"
68#include "storage/spin.h"
69#include "tcop/utility.h"
70#include "utils/acl.h"
71#include "utils/builtins.h"
72#include "utils/memutils.h"
73#include "utils/timestamp.h"
74
76 .name = "pg_stat_statements",
77 .version = PG_VERSION
78);
79
80/* Location of permanent stats file (valid when database is shut down) */
81#define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
82
83/*
84 * Location of external query text file.
85 */
86#define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
87
88/* Magic number identifying the stats file format */
89static const uint32 PGSS_FILE_HEADER = 0x20250731;
90
91/* PostgreSQL major version number, changes in which invalidate all entries */
92static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
93
94/* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
95#define USAGE_EXEC(duration) (1.0)
96#define USAGE_INIT (1.0) /* including initial planning */
97#define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
98#define ASSUMED_LENGTH_INIT 1024 /* initial assumed mean query length */
99#define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
100#define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
101#define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
102#define IS_STICKY(c) ((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
103
104/*
105 * Extension version number, for supporting older extension versions' objects
106 */
107typedef enum pgssVersion
108{
120
121typedef enum pgssStoreKind
122{
124
125 /*
126 * PGSS_PLAN and PGSS_EXEC must be respectively 0 and 1 as they're used to
127 * reference the underlying values in the arrays in the Counters struct,
128 * and this order is required in pg_stat_statements_internal().
129 */
133
134#define PGSS_NUMKIND (PGSS_EXEC + 1)
135
136/*
137 * Hashtable key that defines the identity of a hashtable entry. We separate
138 * queries by user and by database even if they are otherwise identical.
139 *
140 * If you add a new key to this struct, make sure to teach pgss_store() to
141 * zero the padding bytes. Otherwise, things will break, because pgss_hash is
142 * created using HASH_BLOBS, and thus tag_hash is used to hash this.
143 */
144typedef struct pgssHashKey
145{
146 Oid userid; /* user OID */
147 Oid dbid; /* database OID */
148 int64 queryid; /* query identifier */
149 bool toplevel; /* query executed at top level */
151
152/*
153 * The actual stats counters kept within pgssEntry.
154 */
155typedef struct Counters
156{
157 int64 calls[PGSS_NUMKIND]; /* # of times planned/executed */
158 double total_time[PGSS_NUMKIND]; /* total planning/execution time,
159 * in msec */
160 double min_time[PGSS_NUMKIND]; /* minimum planning/execution time in
161 * msec since min/max reset */
162 double max_time[PGSS_NUMKIND]; /* maximum planning/execution time in
163 * msec since min/max reset */
164 double mean_time[PGSS_NUMKIND]; /* mean planning/execution time in
165 * msec */
166 double sum_var_time[PGSS_NUMKIND]; /* sum of variances in
167 * planning/execution time in msec */
168 int64 rows; /* total # of retrieved or affected rows */
169 int64 shared_blks_hit; /* # of shared buffer hits */
170 int64 shared_blks_read; /* # of shared disk blocks read */
171 int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
172 int64 shared_blks_written; /* # of shared disk blocks written */
173 int64 local_blks_hit; /* # of local buffer hits */
174 int64 local_blks_read; /* # of local disk blocks read */
175 int64 local_blks_dirtied; /* # of local disk blocks dirtied */
176 int64 local_blks_written; /* # of local disk blocks written */
177 int64 temp_blks_read; /* # of temp blocks read */
178 int64 temp_blks_written; /* # of temp blocks written */
179 double shared_blk_read_time; /* time spent reading shared blocks,
180 * in msec */
181 double shared_blk_write_time; /* time spent writing shared blocks,
182 * in msec */
183 double local_blk_read_time; /* time spent reading local blocks, in
184 * msec */
185 double local_blk_write_time; /* time spent writing local blocks, in
186 * msec */
187 double temp_blk_read_time; /* time spent reading temp blocks, in msec */
188 double temp_blk_write_time; /* time spent writing temp blocks, in
189 * msec */
190 double usage; /* usage factor */
191 int64 wal_records; /* # of WAL records generated */
192 int64 wal_fpi; /* # of WAL full page images generated */
193 uint64 wal_bytes; /* total amount of WAL generated in bytes */
194 int64 wal_buffers_full; /* # of times the WAL buffers became full */
195 int64 jit_functions; /* total number of JIT functions emitted */
196 double jit_generation_time; /* total time to generate jit code */
197 int64 jit_inlining_count; /* number of times inlining time has been
198 * > 0 */
199 double jit_deform_time; /* total time to deform tuples in jit code */
200 int64 jit_deform_count; /* number of times deform time has been >
201 * 0 */
202
203 double jit_inlining_time; /* total time to inline jit code */
204 int64 jit_optimization_count; /* number of times optimization time
205 * has been > 0 */
206 double jit_optimization_time; /* total time to optimize jit code */
207 int64 jit_emission_count; /* number of times emission time has been
208 * > 0 */
209 double jit_emission_time; /* total time to emit jit code */
210 int64 parallel_workers_to_launch; /* # of parallel workers planned
211 * to be launched */
212 int64 parallel_workers_launched; /* # of parallel workers actually
213 * launched */
214 int64 generic_plan_calls; /* number of calls using a generic plan */
215 int64 custom_plan_calls; /* number of calls using a custom plan */
217
218/*
219 * Global statistics for pg_stat_statements
220 */
221typedef struct pgssGlobalStats
222{
223 int64 dealloc; /* # of times entries were deallocated */
224 TimestampTz stats_reset; /* timestamp with all stats reset */
226
227/*
228 * Statistics per statement
229 *
230 * Note: in event of a failure in garbage collection of the query text file,
231 * we reset query_offset to zero and query_len to -1. This will be seen as
232 * an invalid state by qtext_fetch().
233 */
234typedef struct pgssEntry
235{
236 pgssHashKey key; /* hash key of entry - MUST BE FIRST */
237 Counters counters; /* the statistics for this query */
238 Size query_offset; /* query text offset in external file */
239 int query_len; /* # of valid bytes in query string, or -1 */
240 int encoding; /* query text encoding */
241 TimestampTz stats_since; /* timestamp of entry allocation */
242 TimestampTz minmax_stats_since; /* timestamp of last min/max values reset */
243 slock_t mutex; /* protects the counters only */
245
246/*
247 * Global shared state
248 */
249typedef struct pgssSharedState
250{
251 LWLock *lock; /* protects hashtable search/modification */
252 double cur_median_usage; /* current median usage in hashtable */
253 Size mean_query_len; /* current mean entry text length */
254 slock_t mutex; /* protects following fields only: */
255 Size extent; /* current extent of query file */
256 int n_writers; /* number of active writers to query file */
257 int gc_count; /* query file garbage collection cycle count */
258 pgssGlobalStats stats; /* global statistics for pgss */
260
261/*---- Local variables ----*/
262
263/* Current nesting depth of planner/ExecutorRun/ProcessUtility calls */
264static int nesting_level = 0;
265
266/* Saved hook values */
276
277/* Links to shared memory state */
278static pgssSharedState *pgss = NULL;
279static HTAB *pgss_hash = NULL;
280
281/*---- GUC variables ----*/
282
283typedef enum
284{
285 PGSS_TRACK_NONE, /* track no statements */
286 PGSS_TRACK_TOP, /* only top level statements */
287 PGSS_TRACK_ALL, /* all statements, including nested ones */
289
290static const struct config_enum_entry track_options[] =
291{
292 {"none", PGSS_TRACK_NONE, false},
293 {"top", PGSS_TRACK_TOP, false},
294 {"all", PGSS_TRACK_ALL, false},
295 {NULL, 0, false}
296};
297
298static int pgss_max = 5000; /* max # statements to track */
299static int pgss_track = PGSS_TRACK_TOP; /* tracking level */
300static bool pgss_track_utility = true; /* whether to track utility commands */
301static bool pgss_track_planning = false; /* whether to track planning
302 * duration */
303static bool pgss_save = true; /* whether to save stats across shutdown */
304
305#define pgss_enabled(level) \
306 (!IsParallelWorker() && \
307 (pgss_track == PGSS_TRACK_ALL || \
308 (pgss_track == PGSS_TRACK_TOP && (level) == 0)))
309
310#define record_gc_qtexts() \
311 do { \
312 SpinLockAcquire(&pgss->mutex); \
313 pgss->gc_count++; \
314 SpinLockRelease(&pgss->mutex); \
315 } while(0)
316
317/*---- Function declarations ----*/
318
332
333static void pgss_shmem_request(void);
334static void pgss_shmem_startup(void);
335static void pgss_shmem_shutdown(int code, Datum arg);
336static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
337 JumbleState *jstate);
339 const char *query_string,
340 int cursorOptions,
341 ParamListInfo boundParams,
342 ExplainState *es);
343static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
344static void pgss_ExecutorRun(QueryDesc *queryDesc,
345 ScanDirection direction,
346 uint64 count);
347static void pgss_ExecutorFinish(QueryDesc *queryDesc);
348static void pgss_ExecutorEnd(QueryDesc *queryDesc);
349static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
350 bool readOnlyTree,
351 ProcessUtilityContext context, ParamListInfo params,
352 QueryEnvironment *queryEnv,
354static void pgss_store(const char *query, int64 queryId,
355 int query_location, int query_len,
356 pgssStoreKind kind,
357 double total_time, uint64 rows,
358 const BufferUsage *bufusage,
359 const WalUsage *walusage,
360 const struct JitInstrumentation *jitusage,
361 JumbleState *jstate,
362 int parallel_workers_to_launch,
363 int parallel_workers_launched,
364 PlannedStmtOrigin planOrigin);
366 pgssVersion api_version,
367 bool showtext);
368static Size pgss_memsize(void);
369static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
370 int encoding, bool sticky);
371static void entry_dealloc(void);
372static bool qtext_store(const char *query, int query_len,
373 Size *query_offset, int *gc_count);
374static char *qtext_load_file(Size *buffer_size);
375static char *qtext_fetch(Size query_offset, int query_len,
376 char *buffer, Size buffer_size);
377static bool need_gc_qtexts(void);
378static void gc_qtexts(void);
379static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only);
380static char *generate_normalized_query(JumbleState *jstate, const char *query,
381 int query_loc, int *query_len_p);
382static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
383 int query_loc);
384static int comp_location(const void *a, const void *b);
385
386
387/*
388 * Module load callback
389 */
390void
392{
393 /*
394 * In order to create our shared memory area, we have to be loaded via
395 * shared_preload_libraries. If not, fall out without hooking into any of
396 * the main system. (We don't throw error here because it seems useful to
397 * allow the pg_stat_statements functions to be created even when the
398 * module isn't active. The functions must protect themselves against
399 * being called then, however.)
400 */
402 return;
403
404 /*
405 * Inform the postmaster that we want to enable query_id calculation if
406 * compute_query_id is set to auto.
407 */
409
410 /*
411 * Define (or redefine) custom GUC variables.
412 */
413 DefineCustomIntVariable("pg_stat_statements.max",
414 "Sets the maximum number of statements tracked by pg_stat_statements.",
415 NULL,
416 &pgss_max,
417 5000,
418 100,
419 INT_MAX / 2,
421 0,
422 NULL,
423 NULL,
424 NULL);
425
426 DefineCustomEnumVariable("pg_stat_statements.track",
427 "Selects which statements are tracked by pg_stat_statements.",
428 NULL,
429 &pgss_track,
432 PGC_SUSET,
433 0,
434 NULL,
435 NULL,
436 NULL);
437
438 DefineCustomBoolVariable("pg_stat_statements.track_utility",
439 "Selects whether utility commands are tracked by pg_stat_statements.",
440 NULL,
442 true,
443 PGC_SUSET,
444 0,
445 NULL,
446 NULL,
447 NULL);
448
449 DefineCustomBoolVariable("pg_stat_statements.track_planning",
450 "Selects whether planning duration is tracked by pg_stat_statements.",
451 NULL,
453 false,
454 PGC_SUSET,
455 0,
456 NULL,
457 NULL,
458 NULL);
459
460 DefineCustomBoolVariable("pg_stat_statements.save",
461 "Save pg_stat_statements statistics across server shutdowns.",
462 NULL,
463 &pgss_save,
464 true,
466 0,
467 NULL,
468 NULL,
469 NULL);
470
471 MarkGUCPrefixReserved("pg_stat_statements");
472
473 /*
474 * Install hooks.
475 */
494}
495
496/*
497 * shmem_request hook: request additional shared resources. We'll allocate or
498 * attach to the shared resources in pgss_shmem_startup().
499 */
500static void
502{
505
507 RequestNamedLWLockTranche("pg_stat_statements", 1);
508}
509
510/*
511 * shmem_startup hook: allocate or attach to shared memory,
512 * then load any pre-existing statistics from file.
513 * Also create and load the query-texts file, which is expected to exist
514 * (even if empty) while the module is enabled.
515 */
516static void
518{
519 bool found;
520 HASHCTL info;
521 FILE *file = NULL;
522 FILE *qfile = NULL;
523 uint32 header;
524 int32 num;
525 int32 pgver;
526 int32 i;
527 int buffer_size;
528 char *buffer = NULL;
529
532
533 /* reset in case this is a restart within the postmaster */
534 pgss = NULL;
535 pgss_hash = NULL;
536
537 /*
538 * Create or attach to the shared memory state, including hash table
539 */
540 LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
541
542 pgss = ShmemInitStruct("pg_stat_statements",
543 sizeof(pgssSharedState),
544 &found);
545
546 if (!found)
547 {
548 /* First time through ... */
549 pgss->lock = &(GetNamedLWLockTranche("pg_stat_statements"))->lock;
553 pgss->extent = 0;
554 pgss->n_writers = 0;
555 pgss->gc_count = 0;
556 pgss->stats.dealloc = 0;
558 }
559
560 info.keysize = sizeof(pgssHashKey);
561 info.entrysize = sizeof(pgssEntry);
562 pgss_hash = ShmemInitHash("pg_stat_statements hash",
564 &info,
566
567 LWLockRelease(AddinShmemInitLock);
568
569 /*
570 * If we're in the postmaster (or a standalone backend...), set up a shmem
571 * exit hook to dump the statistics to disk.
572 */
575
576 /*
577 * Done if some other process already completed our initialization.
578 */
579 if (found)
580 return;
581
582 /*
583 * Note: we don't bother with locks here, because there should be no other
584 * processes running when this code is reached.
585 */
586
587 /* Unlink query text file possibly left over from crash */
588 unlink(PGSS_TEXT_FILE);
589
590 /* Allocate new query text temp file */
592 if (qfile == NULL)
593 goto write_error;
594
595 /*
596 * If we were told not to load old statistics, we're done. (Note we do
597 * not try to unlink any old dump file in this case. This seems a bit
598 * questionable but it's the historical behavior.)
599 */
600 if (!pgss_save)
601 {
602 FreeFile(qfile);
603 return;
604 }
605
606 /*
607 * Attempt to load old statistics from the dump file.
608 */
610 if (file == NULL)
611 {
612 if (errno != ENOENT)
613 goto read_error;
614 /* No existing persisted stats file, so we're done */
615 FreeFile(qfile);
616 return;
617 }
618
619 buffer_size = 2048;
620 buffer = (char *) palloc(buffer_size);
621
622 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
623 fread(&pgver, sizeof(uint32), 1, file) != 1 ||
624 fread(&num, sizeof(int32), 1, file) != 1)
625 goto read_error;
626
627 if (header != PGSS_FILE_HEADER ||
628 pgver != PGSS_PG_MAJOR_VERSION)
629 goto data_error;
630
631 for (i = 0; i < num; i++)
632 {
633 pgssEntry temp;
634 pgssEntry *entry;
635 Size query_offset;
636
637 if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
638 goto read_error;
639
640 /* Encoding is the only field we can easily sanity-check */
642 goto data_error;
643
644 /* Resize buffer as needed */
645 if (temp.query_len >= buffer_size)
646 {
647 buffer_size = Max(buffer_size * 2, temp.query_len + 1);
648 buffer = repalloc(buffer, buffer_size);
649 }
650
651 if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
652 goto read_error;
653
654 /* Should have a trailing null, but let's make sure */
655 buffer[temp.query_len] = '\0';
656
657 /* Skip loading "sticky" entries */
658 if (IS_STICKY(temp.counters))
659 continue;
660
661 /* Store the query text */
662 query_offset = pgss->extent;
663 if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
664 goto write_error;
665 pgss->extent += temp.query_len + 1;
666
667 /* make the hashtable entry (discards old entries if too many) */
668 entry = entry_alloc(&temp.key, query_offset, temp.query_len,
669 temp.encoding,
670 false);
671
672 /* copy in the actual stats */
673 entry->counters = temp.counters;
674 entry->stats_since = temp.stats_since;
676 }
677
678 /* Read global statistics for pg_stat_statements */
679 if (fread(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
680 goto read_error;
681
682 pfree(buffer);
683 FreeFile(file);
684 FreeFile(qfile);
685
686 /*
687 * Remove the persisted stats file so it's not included in
688 * backups/replication standbys, etc. A new file will be written on next
689 * shutdown.
690 *
691 * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
692 * because we remove that file on startup; it acts inversely to
693 * PGSS_DUMP_FILE, in that it is only supposed to be around when the
694 * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
695 * when the server is not running. Leaving the file creates no danger of
696 * a newly restored database having a spurious record of execution costs,
697 * which is what we're really concerned about here.
698 */
699 unlink(PGSS_DUMP_FILE);
700
701 return;
702
703read_error:
704 ereport(LOG,
706 errmsg("could not read file \"%s\": %m",
708 goto fail;
709data_error:
710 ereport(LOG,
711 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
712 errmsg("ignoring invalid data in file \"%s\"",
714 goto fail;
715write_error:
716 ereport(LOG,
718 errmsg("could not write file \"%s\": %m",
720fail:
721 if (buffer)
722 pfree(buffer);
723 if (file)
724 FreeFile(file);
725 if (qfile)
726 FreeFile(qfile);
727 /* If possible, throw away the bogus file; ignore any error */
728 unlink(PGSS_DUMP_FILE);
729
730 /*
731 * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
732 * server is running with pg_stat_statements enabled
733 */
734}
735
736/*
737 * shmem_shutdown hook: Dump statistics into file.
738 *
739 * Note: we don't bother with acquiring lock, because there should be no
740 * other processes running when this is called.
741 */
742static void
744{
745 FILE *file;
746 char *qbuffer = NULL;
747 Size qbuffer_size = 0;
748 HASH_SEQ_STATUS hash_seq;
749 int32 num_entries;
750 pgssEntry *entry;
751
752 /* Don't try to dump during a crash. */
753 if (code)
754 return;
755
756 /* Safety check ... shouldn't get here unless shmem is set up. */
757 if (!pgss || !pgss_hash)
758 return;
759
760 /* Don't dump if told not to. */
761 if (!pgss_save)
762 return;
763
765 if (file == NULL)
766 goto error;
767
768 if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
769 goto error;
770 if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
771 goto error;
772 num_entries = hash_get_num_entries(pgss_hash);
773 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
774 goto error;
775
776 qbuffer = qtext_load_file(&qbuffer_size);
777 if (qbuffer == NULL)
778 goto error;
779
780 /*
781 * When serializing to disk, we store query texts immediately after their
782 * entry data. Any orphaned query texts are thereby excluded.
783 */
784 hash_seq_init(&hash_seq, pgss_hash);
785 while ((entry = hash_seq_search(&hash_seq)) != NULL)
786 {
787 int len = entry->query_len;
788 char *qstr = qtext_fetch(entry->query_offset, len,
789 qbuffer, qbuffer_size);
790
791 if (qstr == NULL)
792 continue; /* Ignore any entries with bogus texts */
793
794 if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
795 fwrite(qstr, 1, len + 1, file) != len + 1)
796 {
797 /* note: we assume hash_seq_term won't change errno */
798 hash_seq_term(&hash_seq);
799 goto error;
800 }
801 }
802
803 /* Dump global statistics for pg_stat_statements */
804 if (fwrite(&pgss->stats, sizeof(pgssGlobalStats), 1, file) != 1)
805 goto error;
806
807 free(qbuffer);
808 qbuffer = NULL;
809
810 if (FreeFile(file))
811 {
812 file = NULL;
813 goto error;
814 }
815
816 /*
817 * Rename file into place, so we atomically replace any old one.
818 */
820
821 /* Unlink query-texts file; it's not needed while shutdown */
822 unlink(PGSS_TEXT_FILE);
823
824 return;
825
826error:
827 ereport(LOG,
829 errmsg("could not write file \"%s\": %m",
830 PGSS_DUMP_FILE ".tmp")));
831 free(qbuffer);
832 if (file)
833 FreeFile(file);
834 unlink(PGSS_DUMP_FILE ".tmp");
835 unlink(PGSS_TEXT_FILE);
836}
837
838/*
839 * Post-parse-analysis hook: mark query with a queryId
840 */
841static void
843{
845 prev_post_parse_analyze_hook(pstate, query, jstate);
846
847 /* Safety check... */
849 return;
850
851 /*
852 * If it's EXECUTE, clear the queryId so that stats will accumulate for
853 * the underlying PREPARE. But don't do this if we're not tracking
854 * utility statements, to avoid messing up another extension that might be
855 * tracking them.
856 */
857 if (query->utilityStmt)
858 {
860 {
861 query->queryId = INT64CONST(0);
862 return;
863 }
864 }
865
866 /*
867 * If query jumbling were able to identify any ignorable constants, we
868 * immediately create a hash table entry for the query, so that we can
869 * record the normalized form of the query string. If there were no such
870 * constants, the normalized string would be the same as the query text
871 * anyway, so there's no need for an early entry.
872 */
873 if (jstate && jstate->clocations_count > 0)
874 pgss_store(pstate->p_sourcetext,
875 query->queryId,
876 query->stmt_location,
877 query->stmt_len,
879 0,
880 0,
881 NULL,
882 NULL,
883 NULL,
884 jstate,
885 0,
886 0,
888}
889
890/*
891 * Planner hook: forward to regular planner, but measure planning time
892 * if needed.
893 */
894static PlannedStmt *
896 const char *query_string,
897 int cursorOptions,
898 ParamListInfo boundParams,
899 ExplainState *es)
900{
901 PlannedStmt *result;
902
903 /*
904 * We can't process the query if no query_string is provided, as
905 * pgss_store needs it. We also ignore query without queryid, as it would
906 * be treated as a utility statement, which may not be the case.
907 */
909 && pgss_track_planning && query_string
910 && parse->queryId != INT64CONST(0))
911 {
914 BufferUsage bufusage_start,
915 bufusage;
916 WalUsage walusage_start,
917 walusage;
918
919 /* We need to track buffer usage as the planner can access them. */
920 bufusage_start = pgBufferUsage;
921
922 /*
923 * Similarly the planner could write some WAL records in some cases
924 * (e.g. setting a hint bit with those being WAL-logged)
925 */
926 walusage_start = pgWalUsage;
928
930 PG_TRY();
931 {
933 result = prev_planner_hook(parse, query_string, cursorOptions,
934 boundParams, es);
935 else
936 result = standard_planner(parse, query_string, cursorOptions,
937 boundParams, es);
938 }
939 PG_FINALLY();
940 {
942 }
943 PG_END_TRY();
944
947
948 /* calc differences of buffer counters. */
949 memset(&bufusage, 0, sizeof(BufferUsage));
950 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
951
952 /* calc differences of WAL counters. */
953 memset(&walusage, 0, sizeof(WalUsage));
954 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
955
956 pgss_store(query_string,
957 parse->queryId,
958 parse->stmt_location,
959 parse->stmt_len,
960 PGSS_PLAN,
962 0,
963 &bufusage,
964 &walusage,
965 NULL,
966 NULL,
967 0,
968 0,
969 result->planOrigin);
970 }
971 else
972 {
973 /*
974 * Even though we're not tracking plan time for this statement, we
975 * must still increment the nesting level, to ensure that functions
976 * evaluated during planning are not seen as top-level calls.
977 */
979 PG_TRY();
980 {
982 result = prev_planner_hook(parse, query_string, cursorOptions,
983 boundParams, es);
984 else
985 result = standard_planner(parse, query_string, cursorOptions,
986 boundParams, es);
987 }
988 PG_FINALLY();
989 {
991 }
992 PG_END_TRY();
993 }
994
995 return result;
996}
997
998/*
999 * ExecutorStart hook: start up tracking if needed
1000 */
1001static void
1002pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
1003{
1005 prev_ExecutorStart(queryDesc, eflags);
1006 else
1007 standard_ExecutorStart(queryDesc, eflags);
1008
1009 /*
1010 * If query has queryId zero, don't track it. This prevents double
1011 * counting of optimizable statements that are directly contained in
1012 * utility statements.
1013 */
1014 if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != INT64CONST(0))
1015 {
1016 /*
1017 * Set up to track total elapsed time in ExecutorRun. Make sure the
1018 * space is allocated in the per-query context so it will go away at
1019 * ExecutorEnd.
1020 */
1021 if (queryDesc->totaltime == NULL)
1022 {
1023 MemoryContext oldcxt;
1024
1025 oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
1026 queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL, false);
1027 MemoryContextSwitchTo(oldcxt);
1028 }
1029 }
1030}
1031
1032/*
1033 * ExecutorRun hook: all we need do is track nesting depth
1034 */
1035static void
1036pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
1037{
1038 nesting_level++;
1039 PG_TRY();
1040 {
1041 if (prev_ExecutorRun)
1042 prev_ExecutorRun(queryDesc, direction, count);
1043 else
1044 standard_ExecutorRun(queryDesc, direction, count);
1045 }
1046 PG_FINALLY();
1047 {
1048 nesting_level--;
1049 }
1050 PG_END_TRY();
1051}
1052
1053/*
1054 * ExecutorFinish hook: all we need do is track nesting depth
1055 */
1056static void
1058{
1059 nesting_level++;
1060 PG_TRY();
1061 {
1063 prev_ExecutorFinish(queryDesc);
1064 else
1065 standard_ExecutorFinish(queryDesc);
1066 }
1067 PG_FINALLY();
1068 {
1069 nesting_level--;
1070 }
1071 PG_END_TRY();
1072}
1073
1074/*
1075 * ExecutorEnd hook: store results if needed
1076 */
1077static void
1079{
1080 int64 queryId = queryDesc->plannedstmt->queryId;
1081
1082 if (queryId != INT64CONST(0) && queryDesc->totaltime &&
1084 {
1085 /*
1086 * Make sure stats accumulation is done. (Note: it's okay if several
1087 * levels of hook all do this.)
1088 */
1089 InstrEndLoop(queryDesc->totaltime);
1090
1091 pgss_store(queryDesc->sourceText,
1092 queryId,
1093 queryDesc->plannedstmt->stmt_location,
1094 queryDesc->plannedstmt->stmt_len,
1095 PGSS_EXEC,
1096 queryDesc->totaltime->total * 1000.0, /* convert to msec */
1097 queryDesc->estate->es_total_processed,
1098 &queryDesc->totaltime->bufusage,
1099 &queryDesc->totaltime->walusage,
1100 queryDesc->estate->es_jit ? &queryDesc->estate->es_jit->instr : NULL,
1101 NULL,
1104 queryDesc->plannedstmt->planOrigin);
1105 }
1106
1107 if (prev_ExecutorEnd)
1108 prev_ExecutorEnd(queryDesc);
1109 else
1110 standard_ExecutorEnd(queryDesc);
1111}
1112
1113/*
1114 * ProcessUtility hook
1115 */
1116static void
1117pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
1118 bool readOnlyTree,
1119 ProcessUtilityContext context,
1120 ParamListInfo params, QueryEnvironment *queryEnv,
1122{
1123 Node *parsetree = pstmt->utilityStmt;
1124 int64 saved_queryId = pstmt->queryId;
1125 int saved_stmt_location = pstmt->stmt_location;
1126 int saved_stmt_len = pstmt->stmt_len;
1128
1129 /*
1130 * Force utility statements to get queryId zero. We do this even in cases
1131 * where the statement contains an optimizable statement for which a
1132 * queryId could be derived (such as EXPLAIN or DECLARE CURSOR). For such
1133 * cases, runtime control will first go through ProcessUtility and then
1134 * the executor, and we don't want the executor hooks to do anything,
1135 * since we are already measuring the statement's costs at the utility
1136 * level.
1137 *
1138 * Note that this is only done if pg_stat_statements is enabled and
1139 * configured to track utility statements, in the unlikely possibility
1140 * that user configured another extension to handle utility statements
1141 * only.
1142 */
1143 if (enabled)
1144 pstmt->queryId = INT64CONST(0);
1145
1146 /*
1147 * If it's an EXECUTE statement, we don't track it and don't increment the
1148 * nesting level. This allows the cycles to be charged to the underlying
1149 * PREPARE instead (by the Executor hooks), which is much more useful.
1150 *
1151 * We also don't track execution of PREPARE. If we did, we would get one
1152 * hash table entry for the PREPARE (with hash calculated from the query
1153 * string), and then a different one with the same query string (but hash
1154 * calculated from the query tree) would be used to accumulate costs of
1155 * ensuing EXECUTEs. This would be confusing. Since PREPARE doesn't
1156 * actually run the planner (only parse+rewrite), its costs are generally
1157 * pretty negligible and it seems okay to just ignore it.
1158 */
1159 if (enabled &&
1160 !IsA(parsetree, ExecuteStmt) &&
1161 !IsA(parsetree, PrepareStmt))
1162 {
1165 uint64 rows;
1166 BufferUsage bufusage_start,
1167 bufusage;
1168 WalUsage walusage_start,
1169 walusage;
1170
1171 bufusage_start = pgBufferUsage;
1172 walusage_start = pgWalUsage;
1174
1175 nesting_level++;
1176 PG_TRY();
1177 {
1179 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1180 context, params, queryEnv,
1181 dest, qc);
1182 else
1183 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1184 context, params, queryEnv,
1185 dest, qc);
1186 }
1187 PG_FINALLY();
1188 {
1189 nesting_level--;
1190 }
1191 PG_END_TRY();
1192
1193 /*
1194 * CAUTION: do not access the *pstmt data structure again below here.
1195 * If it was a ROLLBACK or similar, that data structure may have been
1196 * freed. We must copy everything we still need into local variables,
1197 * which we did above.
1198 *
1199 * For the same reason, we can't risk restoring pstmt->queryId to its
1200 * former value, which'd otherwise be a good idea.
1201 */
1202
1205
1206 /*
1207 * Track the total number of rows retrieved or affected by the utility
1208 * statements of COPY, FETCH, CREATE TABLE AS, CREATE MATERIALIZED
1209 * VIEW, REFRESH MATERIALIZED VIEW and SELECT INTO.
1210 */
1211 rows = (qc && (qc->commandTag == CMDTAG_COPY ||
1212 qc->commandTag == CMDTAG_FETCH ||
1213 qc->commandTag == CMDTAG_SELECT ||
1214 qc->commandTag == CMDTAG_REFRESH_MATERIALIZED_VIEW)) ?
1215 qc->nprocessed : 0;
1216
1217 /* calc differences of buffer counters. */
1218 memset(&bufusage, 0, sizeof(BufferUsage));
1219 BufferUsageAccumDiff(&bufusage, &pgBufferUsage, &bufusage_start);
1220
1221 /* calc differences of WAL counters. */
1222 memset(&walusage, 0, sizeof(WalUsage));
1223 WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
1224
1225 pgss_store(queryString,
1226 saved_queryId,
1227 saved_stmt_location,
1228 saved_stmt_len,
1229 PGSS_EXEC,
1231 rows,
1232 &bufusage,
1233 &walusage,
1234 NULL,
1235 NULL,
1236 0,
1237 0,
1238 pstmt->planOrigin);
1239 }
1240 else
1241 {
1242 /*
1243 * Even though we're not tracking execution time for this statement,
1244 * we must still increment the nesting level, to ensure that functions
1245 * evaluated within it are not seen as top-level calls. But don't do
1246 * so for EXECUTE; that way, when control reaches pgss_planner or
1247 * pgss_ExecutorStart, we will treat the costs as top-level if
1248 * appropriate. Likewise, don't bump for PREPARE, so that parse
1249 * analysis will treat the statement as top-level if appropriate.
1250 *
1251 * To be absolutely certain we don't mess up the nesting level,
1252 * evaluate the bump_level condition just once.
1253 */
1254 bool bump_level =
1255 !IsA(parsetree, ExecuteStmt) &&
1256 !IsA(parsetree, PrepareStmt);
1257
1258 if (bump_level)
1259 nesting_level++;
1260 PG_TRY();
1261 {
1263 prev_ProcessUtility(pstmt, queryString, readOnlyTree,
1264 context, params, queryEnv,
1265 dest, qc);
1266 else
1267 standard_ProcessUtility(pstmt, queryString, readOnlyTree,
1268 context, params, queryEnv,
1269 dest, qc);
1270 }
1271 PG_FINALLY();
1272 {
1273 if (bump_level)
1274 nesting_level--;
1275 }
1276 PG_END_TRY();
1277 }
1278}
1279
1280/*
1281 * Store some statistics for a statement.
1282 *
1283 * If jstate is not NULL then we're trying to create an entry for which
1284 * we have no statistics as yet; we just want to record the normalized
1285 * query string. total_time, rows, bufusage and walusage are ignored in this
1286 * case.
1287 *
1288 * If kind is PGSS_PLAN or PGSS_EXEC, its value is used as the array position
1289 * for the arrays in the Counters field.
1290 */
1291static void
1292pgss_store(const char *query, int64 queryId,
1293 int query_location, int query_len,
1294 pgssStoreKind kind,
1295 double total_time, uint64 rows,
1296 const BufferUsage *bufusage,
1297 const WalUsage *walusage,
1298 const struct JitInstrumentation *jitusage,
1299 JumbleState *jstate,
1300 int parallel_workers_to_launch,
1301 int parallel_workers_launched,
1302 PlannedStmtOrigin planOrigin)
1303{
1305 pgssEntry *entry;
1306 char *norm_query = NULL;
1308
1309 Assert(query != NULL);
1310
1311 /* Safety check... */
1312 if (!pgss || !pgss_hash)
1313 return;
1314
1315 /*
1316 * Nothing to do if compute_query_id isn't enabled and no other module
1317 * computed a query identifier.
1318 */
1319 if (queryId == INT64CONST(0))
1320 return;
1321
1322 /*
1323 * Confine our attention to the relevant part of the string, if the query
1324 * is a portion of a multi-statement source string, and update query
1325 * location and length if needed.
1326 */
1327 query = CleanQuerytext(query, &query_location, &query_len);
1328
1329 /* Set up key for hashtable search */
1330
1331 /* clear padding */
1332 memset(&key, 0, sizeof(pgssHashKey));
1333
1334 key.userid = GetUserId();
1335 key.dbid = MyDatabaseId;
1336 key.queryid = queryId;
1337 key.toplevel = (nesting_level == 0);
1338
1339 /* Lookup the hash table entry with shared lock. */
1341
1342 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
1343
1344 /* Create new entry, if not present */
1345 if (!entry)
1346 {
1347 Size query_offset;
1348 int gc_count;
1349 bool stored;
1350 bool do_gc;
1351
1352 /*
1353 * Create a new, normalized query string if caller asked. We don't
1354 * need to hold the lock while doing this work. (Note: in any case,
1355 * it's possible that someone else creates a duplicate hashtable entry
1356 * in the interval where we don't hold the lock below. That case is
1357 * handled by entry_alloc.)
1358 */
1359 if (jstate)
1360 {
1362 norm_query = generate_normalized_query(jstate, query,
1363 query_location,
1364 &query_len);
1366 }
1367
1368 /* Append new query text to file with only shared lock held */
1369 stored = qtext_store(norm_query ? norm_query : query, query_len,
1370 &query_offset, &gc_count);
1371
1372 /*
1373 * Determine whether we need to garbage collect external query texts
1374 * while the shared lock is still held. This micro-optimization
1375 * avoids taking the time to decide this while holding exclusive lock.
1376 */
1377 do_gc = need_gc_qtexts();
1378
1379 /* Need exclusive lock to make a new hashtable entry - promote */
1382
1383 /*
1384 * A garbage collection may have occurred while we weren't holding the
1385 * lock. In the unlikely event that this happens, the query text we
1386 * stored above will have been garbage collected, so write it again.
1387 * This should be infrequent enough that doing it while holding
1388 * exclusive lock isn't a performance problem.
1389 */
1390 if (!stored || pgss->gc_count != gc_count)
1391 stored = qtext_store(norm_query ? norm_query : query, query_len,
1392 &query_offset, NULL);
1393
1394 /* If we failed to write to the text file, give up */
1395 if (!stored)
1396 goto done;
1397
1398 /* OK to create a new hashtable entry */
1399 entry = entry_alloc(&key, query_offset, query_len, encoding,
1400 jstate != NULL);
1401
1402 /* If needed, perform garbage collection while exclusive lock held */
1403 if (do_gc)
1404 gc_qtexts();
1405 }
1406
1407 /* Increment the counts, except when jstate is not NULL */
1408 if (!jstate)
1409 {
1410 Assert(kind == PGSS_PLAN || kind == PGSS_EXEC);
1411
1412 /*
1413 * Grab the spinlock while updating the counters (see comment about
1414 * locking rules at the head of the file)
1415 */
1416 SpinLockAcquire(&entry->mutex);
1417
1418 /* "Unstick" entry if it was previously sticky */
1419 if (IS_STICKY(entry->counters))
1420 entry->counters.usage = USAGE_INIT;
1421
1422 entry->counters.calls[kind] += 1;
1423 entry->counters.total_time[kind] += total_time;
1424
1425 if (entry->counters.calls[kind] == 1)
1426 {
1427 entry->counters.min_time[kind] = total_time;
1428 entry->counters.max_time[kind] = total_time;
1429 entry->counters.mean_time[kind] = total_time;
1430 }
1431 else
1432 {
1433 /*
1434 * Welford's method for accurately computing variance. See
1435 * <http://www.johndcook.com/blog/standard_deviation/>
1436 */
1437 double old_mean = entry->counters.mean_time[kind];
1438
1439 entry->counters.mean_time[kind] +=
1440 (total_time - old_mean) / entry->counters.calls[kind];
1441 entry->counters.sum_var_time[kind] +=
1442 (total_time - old_mean) * (total_time - entry->counters.mean_time[kind]);
1443
1444 /*
1445 * Calculate min and max time. min = 0 and max = 0 means that the
1446 * min/max statistics were reset
1447 */
1448 if (entry->counters.min_time[kind] == 0
1449 && entry->counters.max_time[kind] == 0)
1450 {
1451 entry->counters.min_time[kind] = total_time;
1452 entry->counters.max_time[kind] = total_time;
1453 }
1454 else
1455 {
1456 if (entry->counters.min_time[kind] > total_time)
1457 entry->counters.min_time[kind] = total_time;
1458 if (entry->counters.max_time[kind] < total_time)
1459 entry->counters.max_time[kind] = total_time;
1460 }
1461 }
1462 entry->counters.rows += rows;
1463 entry->counters.shared_blks_hit += bufusage->shared_blks_hit;
1464 entry->counters.shared_blks_read += bufusage->shared_blks_read;
1467 entry->counters.local_blks_hit += bufusage->local_blks_hit;
1468 entry->counters.local_blks_read += bufusage->local_blks_read;
1471 entry->counters.temp_blks_read += bufusage->temp_blks_read;
1472 entry->counters.temp_blks_written += bufusage->temp_blks_written;
1479 entry->counters.usage += USAGE_EXEC(total_time);
1480 entry->counters.wal_records += walusage->wal_records;
1481 entry->counters.wal_fpi += walusage->wal_fpi;
1482 entry->counters.wal_bytes += walusage->wal_bytes;
1483 entry->counters.wal_buffers_full += walusage->wal_buffers_full;
1484 if (jitusage)
1485 {
1486 entry->counters.jit_functions += jitusage->created_functions;
1488
1490 entry->counters.jit_deform_count++;
1492
1496
1500
1504 }
1505
1506 /* parallel worker counters */
1507 entry->counters.parallel_workers_to_launch += parallel_workers_to_launch;
1508 entry->counters.parallel_workers_launched += parallel_workers_launched;
1509
1510 /* plan cache counters */
1511 if (planOrigin == PLAN_STMT_CACHE_GENERIC)
1513 else if (planOrigin == PLAN_STMT_CACHE_CUSTOM)
1514 entry->counters.custom_plan_calls++;
1515
1516 SpinLockRelease(&entry->mutex);
1517 }
1518
1519done:
1521
1522 /* We postpone this clean-up until we're out of the lock */
1523 if (norm_query)
1524 pfree(norm_query);
1525}
1526
1527/*
1528 * Reset statement statistics corresponding to userid, dbid, and queryid.
1529 */
1530Datum
1532{
1533 Oid userid;
1534 Oid dbid;
1535 int64 queryid;
1536
1537 userid = PG_GETARG_OID(0);
1538 dbid = PG_GETARG_OID(1);
1539 queryid = PG_GETARG_INT64(2);
1540
1541 entry_reset(userid, dbid, queryid, false);
1542
1544}
1545
1546Datum
1548{
1549 Oid userid;
1550 Oid dbid;
1551 int64 queryid;
1552 bool minmax_only;
1553
1554 userid = PG_GETARG_OID(0);
1555 dbid = PG_GETARG_OID(1);
1556 queryid = PG_GETARG_INT64(2);
1557 minmax_only = PG_GETARG_BOOL(3);
1558
1559 PG_RETURN_TIMESTAMPTZ(entry_reset(userid, dbid, queryid, minmax_only));
1560}
1561
1562/*
1563 * Reset statement statistics.
1564 */
1565Datum
1567{
1568 entry_reset(0, 0, 0, false);
1569
1571}
1572
1573/* Number of output arguments (columns) for various API versions */
1574#define PG_STAT_STATEMENTS_COLS_V1_0 14
1575#define PG_STAT_STATEMENTS_COLS_V1_1 18
1576#define PG_STAT_STATEMENTS_COLS_V1_2 19
1577#define PG_STAT_STATEMENTS_COLS_V1_3 23
1578#define PG_STAT_STATEMENTS_COLS_V1_8 32
1579#define PG_STAT_STATEMENTS_COLS_V1_9 33
1580#define PG_STAT_STATEMENTS_COLS_V1_10 43
1581#define PG_STAT_STATEMENTS_COLS_V1_11 49
1582#define PG_STAT_STATEMENTS_COLS_V1_12 52
1583#define PG_STAT_STATEMENTS_COLS_V1_13 54
1584#define PG_STAT_STATEMENTS_COLS 54 /* maximum of above */
1585
1586/*
1587 * Retrieve statement statistics.
1588 *
1589 * The SQL API of this function has changed multiple times, and will likely
1590 * do so again in future. To support the case where a newer version of this
1591 * loadable module is being used with an old SQL declaration of the function,
1592 * we continue to support the older API versions. For 1.2 and later, the
1593 * expected API version is identified by embedding it in the C name of the
1594 * function. Unfortunately we weren't bright enough to do that for 1.1.
1595 */
1596Datum
1598{
1599 bool showtext = PG_GETARG_BOOL(0);
1600
1601 pg_stat_statements_internal(fcinfo, PGSS_V1_13, showtext);
1602
1603 return (Datum) 0;
1604}
1605
1606Datum
1608{
1609 bool showtext = PG_GETARG_BOOL(0);
1610
1611 pg_stat_statements_internal(fcinfo, PGSS_V1_12, showtext);
1612
1613 return (Datum) 0;
1614}
1615
1616Datum
1618{
1619 bool showtext = PG_GETARG_BOOL(0);
1620
1621 pg_stat_statements_internal(fcinfo, PGSS_V1_11, showtext);
1622
1623 return (Datum) 0;
1624}
1625
1626Datum
1628{
1629 bool showtext = PG_GETARG_BOOL(0);
1630
1631 pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext);
1632
1633 return (Datum) 0;
1634}
1635
1636Datum
1638{
1639 bool showtext = PG_GETARG_BOOL(0);
1640
1641 pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext);
1642
1643 return (Datum) 0;
1644}
1645
1646Datum
1648{
1649 bool showtext = PG_GETARG_BOOL(0);
1650
1651 pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext);
1652
1653 return (Datum) 0;
1654}
1655
1656Datum
1658{
1659 bool showtext = PG_GETARG_BOOL(0);
1660
1661 pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
1662
1663 return (Datum) 0;
1664}
1665
1666Datum
1668{
1669 bool showtext = PG_GETARG_BOOL(0);
1670
1671 pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
1672
1673 return (Datum) 0;
1674}
1675
1676/*
1677 * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
1678 * This can be removed someday, perhaps.
1679 */
1680Datum
1682{
1683 /* If it's really API 1.1, we'll figure that out below */
1685
1686 return (Datum) 0;
1687}
1688
1689/* Common code for all versions of pg_stat_statements() */
1690static void
1692 pgssVersion api_version,
1693 bool showtext)
1694{
1695 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1696 Oid userid = GetUserId();
1697 bool is_allowed_role = false;
1698 char *qbuffer = NULL;
1699 Size qbuffer_size = 0;
1700 Size extent = 0;
1701 int gc_count = 0;
1702 HASH_SEQ_STATUS hash_seq;
1703 pgssEntry *entry;
1704
1705 /*
1706 * Superusers or roles with the privileges of pg_read_all_stats members
1707 * are allowed
1708 */
1709 is_allowed_role = has_privs_of_role(userid, ROLE_PG_READ_ALL_STATS);
1710
1711 /* hash table must exist already */
1712 if (!pgss || !pgss_hash)
1713 ereport(ERROR,
1714 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1715 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
1716
1717 InitMaterializedSRF(fcinfo, 0);
1718
1719 /*
1720 * Check we have the expected number of output arguments. Aside from
1721 * being a good safety check, we need a kluge here to detect API version
1722 * 1.1, which was wedged into the code in an ill-considered way.
1723 */
1724 switch (rsinfo->setDesc->natts)
1725 {
1727 if (api_version != PGSS_V1_0)
1728 elog(ERROR, "incorrect number of output arguments");
1729 break;
1731 /* pg_stat_statements() should have told us 1.0 */
1732 if (api_version != PGSS_V1_0)
1733 elog(ERROR, "incorrect number of output arguments");
1734 api_version = PGSS_V1_1;
1735 break;
1737 if (api_version != PGSS_V1_2)
1738 elog(ERROR, "incorrect number of output arguments");
1739 break;
1741 if (api_version != PGSS_V1_3)
1742 elog(ERROR, "incorrect number of output arguments");
1743 break;
1745 if (api_version != PGSS_V1_8)
1746 elog(ERROR, "incorrect number of output arguments");
1747 break;
1749 if (api_version != PGSS_V1_9)
1750 elog(ERROR, "incorrect number of output arguments");
1751 break;
1753 if (api_version != PGSS_V1_10)
1754 elog(ERROR, "incorrect number of output arguments");
1755 break;
1757 if (api_version != PGSS_V1_11)
1758 elog(ERROR, "incorrect number of output arguments");
1759 break;
1761 if (api_version != PGSS_V1_12)
1762 elog(ERROR, "incorrect number of output arguments");
1763 break;
1765 if (api_version != PGSS_V1_13)
1766 elog(ERROR, "incorrect number of output arguments");
1767 break;
1768 default:
1769 elog(ERROR, "incorrect number of output arguments");
1770 }
1771
1772 /*
1773 * We'd like to load the query text file (if needed) while not holding any
1774 * lock on pgss->lock. In the worst case we'll have to do this again
1775 * after we have the lock, but it's unlikely enough to make this a win
1776 * despite occasional duplicated work. We need to reload if anybody
1777 * writes to the file (either a retail qtext_store(), or a garbage
1778 * collection) between this point and where we've gotten shared lock. If
1779 * a qtext_store is actually in progress when we look, we might as well
1780 * skip the speculative load entirely.
1781 */
1782 if (showtext)
1783 {
1784 int n_writers;
1785
1786 /* Take the mutex so we can examine variables */
1788 extent = pgss->extent;
1789 n_writers = pgss->n_writers;
1790 gc_count = pgss->gc_count;
1792
1793 /* No point in loading file now if there are active writers */
1794 if (n_writers == 0)
1795 qbuffer = qtext_load_file(&qbuffer_size);
1796 }
1797
1798 /*
1799 * Get shared lock, load or reload the query text file if we must, and
1800 * iterate over the hashtable entries.
1801 *
1802 * With a large hash table, we might be holding the lock rather longer
1803 * than one could wish. However, this only blocks creation of new hash
1804 * table entries, and the larger the hash table the less likely that is to
1805 * be needed. So we can hope this is okay. Perhaps someday we'll decide
1806 * we need to partition the hash table to limit the time spent holding any
1807 * one lock.
1808 */
1810
1811 if (showtext)
1812 {
1813 /*
1814 * Here it is safe to examine extent and gc_count without taking the
1815 * mutex. Note that although other processes might change
1816 * pgss->extent just after we look at it, the strings they then write
1817 * into the file cannot yet be referenced in the hashtable, so we
1818 * don't care whether we see them or not.
1819 *
1820 * If qtext_load_file fails, we just press on; we'll return NULL for
1821 * every query text.
1822 */
1823 if (qbuffer == NULL ||
1824 pgss->extent != extent ||
1825 pgss->gc_count != gc_count)
1826 {
1827 free(qbuffer);
1828 qbuffer = qtext_load_file(&qbuffer_size);
1829 }
1830 }
1831
1832 hash_seq_init(&hash_seq, pgss_hash);
1833 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1834 {
1836 bool nulls[PG_STAT_STATEMENTS_COLS];
1837 int i = 0;
1838 Counters tmp;
1839 double stddev;
1840 int64 queryid = entry->key.queryid;
1841 TimestampTz stats_since;
1842 TimestampTz minmax_stats_since;
1843
1844 memset(values, 0, sizeof(values));
1845 memset(nulls, 0, sizeof(nulls));
1846
1847 values[i++] = ObjectIdGetDatum(entry->key.userid);
1848 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1849 if (api_version >= PGSS_V1_9)
1850 values[i++] = BoolGetDatum(entry->key.toplevel);
1851
1852 if (is_allowed_role || entry->key.userid == userid)
1853 {
1854 if (api_version >= PGSS_V1_2)
1855 values[i++] = Int64GetDatumFast(queryid);
1856
1857 if (showtext)
1858 {
1859 char *qstr = qtext_fetch(entry->query_offset,
1860 entry->query_len,
1861 qbuffer,
1862 qbuffer_size);
1863
1864 if (qstr)
1865 {
1866 char *enc;
1867
1868 enc = pg_any_to_server(qstr,
1869 entry->query_len,
1870 entry->encoding);
1871
1873
1874 if (enc != qstr)
1875 pfree(enc);
1876 }
1877 else
1878 {
1879 /* Just return a null if we fail to find the text */
1880 nulls[i++] = true;
1881 }
1882 }
1883 else
1884 {
1885 /* Query text not requested */
1886 nulls[i++] = true;
1887 }
1888 }
1889 else
1890 {
1891 /* Don't show queryid */
1892 if (api_version >= PGSS_V1_2)
1893 nulls[i++] = true;
1894
1895 /*
1896 * Don't show query text, but hint as to the reason for not doing
1897 * so if it was requested
1898 */
1899 if (showtext)
1900 values[i++] = CStringGetTextDatum("<insufficient privilege>");
1901 else
1902 nulls[i++] = true;
1903 }
1904
1905 /* copy counters to a local variable to keep locking time short */
1906 SpinLockAcquire(&entry->mutex);
1907 tmp = entry->counters;
1908 SpinLockRelease(&entry->mutex);
1909
1910 /*
1911 * The spinlock is not required when reading these two as they are
1912 * always updated when holding pgss->lock exclusively.
1913 */
1914 stats_since = entry->stats_since;
1915 minmax_stats_since = entry->minmax_stats_since;
1916
1917 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1918 if (IS_STICKY(tmp))
1919 continue;
1920
1921 /* Note that we rely on PGSS_PLAN being 0 and PGSS_EXEC being 1. */
1922 for (int kind = 0; kind < PGSS_NUMKIND; kind++)
1923 {
1924 if (kind == PGSS_EXEC || api_version >= PGSS_V1_8)
1925 {
1926 values[i++] = Int64GetDatumFast(tmp.calls[kind]);
1927 values[i++] = Float8GetDatumFast(tmp.total_time[kind]);
1928 }
1929
1930 if ((kind == PGSS_EXEC && api_version >= PGSS_V1_3) ||
1931 api_version >= PGSS_V1_8)
1932 {
1933 values[i++] = Float8GetDatumFast(tmp.min_time[kind]);
1934 values[i++] = Float8GetDatumFast(tmp.max_time[kind]);
1935 values[i++] = Float8GetDatumFast(tmp.mean_time[kind]);
1936
1937 /*
1938 * Note we are calculating the population variance here, not
1939 * the sample variance, as we have data for the whole
1940 * population, so Bessel's correction is not used, and we
1941 * don't divide by tmp.calls - 1.
1942 */
1943 if (tmp.calls[kind] > 1)
1944 stddev = sqrt(tmp.sum_var_time[kind] / tmp.calls[kind]);
1945 else
1946 stddev = 0.0;
1947 values[i++] = Float8GetDatumFast(stddev);
1948 }
1949 }
1950 values[i++] = Int64GetDatumFast(tmp.rows);
1953 if (api_version >= PGSS_V1_1)
1958 if (api_version >= PGSS_V1_1)
1963 if (api_version >= PGSS_V1_1)
1964 {
1967 }
1968 if (api_version >= PGSS_V1_11)
1969 {
1972 }
1973 if (api_version >= PGSS_V1_10)
1974 {
1977 }
1978 if (api_version >= PGSS_V1_8)
1979 {
1980 char buf[256];
1981 Datum wal_bytes;
1982
1985
1986 snprintf(buf, sizeof buf, UINT64_FORMAT, tmp.wal_bytes);
1987
1988 /* Convert to numeric. */
1989 wal_bytes = DirectFunctionCall3(numeric_in,
1992 Int32GetDatum(-1));
1993 values[i++] = wal_bytes;
1994 }
1995 if (api_version >= PGSS_V1_12)
1996 {
1998 }
1999 if (api_version >= PGSS_V1_10)
2000 {
2009 }
2010 if (api_version >= PGSS_V1_11)
2011 {
2014 }
2015 if (api_version >= PGSS_V1_12)
2016 {
2019 }
2020 if (api_version >= PGSS_V1_13)
2021 {
2024 }
2025 if (api_version >= PGSS_V1_11)
2026 {
2027 values[i++] = TimestampTzGetDatum(stats_since);
2028 values[i++] = TimestampTzGetDatum(minmax_stats_since);
2029 }
2030
2031 Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
2032 api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
2033 api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
2034 api_version == PGSS_V1_3 ? PG_STAT_STATEMENTS_COLS_V1_3 :
2035 api_version == PGSS_V1_8 ? PG_STAT_STATEMENTS_COLS_V1_8 :
2036 api_version == PGSS_V1_9 ? PG_STAT_STATEMENTS_COLS_V1_9 :
2037 api_version == PGSS_V1_10 ? PG_STAT_STATEMENTS_COLS_V1_10 :
2038 api_version == PGSS_V1_11 ? PG_STAT_STATEMENTS_COLS_V1_11 :
2039 api_version == PGSS_V1_12 ? PG_STAT_STATEMENTS_COLS_V1_12 :
2040 api_version == PGSS_V1_13 ? PG_STAT_STATEMENTS_COLS_V1_13 :
2041 -1 /* fail if you forget to update this assert */ ));
2042
2043 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
2044 }
2045
2047
2048 free(qbuffer);
2049}
2050
2051/* Number of output arguments (columns) for pg_stat_statements_info */
2052#define PG_STAT_STATEMENTS_INFO_COLS 2
2053
2054/*
2055 * Return statistics of pg_stat_statements.
2056 */
2057Datum
2059{
2060 pgssGlobalStats stats;
2061 TupleDesc tupdesc;
2063 bool nulls[PG_STAT_STATEMENTS_INFO_COLS] = {0};
2064
2065 if (!pgss || !pgss_hash)
2066 ereport(ERROR,
2067 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2068 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2069
2070 /* Build a tuple descriptor for our result type */
2071 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
2072 elog(ERROR, "return type must be a row type");
2073
2074 /* Read global statistics for pg_stat_statements */
2076 stats = pgss->stats;
2078
2079 values[0] = Int64GetDatum(stats.dealloc);
2081
2083}
2084
2085/*
2086 * Estimate shared memory space needed.
2087 */
2088static Size
2090{
2091 Size size;
2092
2093 size = MAXALIGN(sizeof(pgssSharedState));
2094 size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
2095
2096 return size;
2097}
2098
2099/*
2100 * Allocate a new hashtable entry.
2101 * caller must hold an exclusive lock on pgss->lock
2102 *
2103 * "query" need not be null-terminated; we rely on query_len instead
2104 *
2105 * If "sticky" is true, make the new entry artificially sticky so that it will
2106 * probably still be there when the query finishes execution. We do this by
2107 * giving it a median usage value rather than the normal value. (Strictly
2108 * speaking, query strings are normalized on a best effort basis, though it
2109 * would be difficult to demonstrate this even under artificial conditions.)
2110 *
2111 * Note: despite needing exclusive lock, it's not an error for the target
2112 * entry to already exist. This is because pgss_store releases and
2113 * reacquires lock after failing to find a match; so someone else could
2114 * have made the entry while we waited to get exclusive lock.
2115 */
2116static pgssEntry *
2117entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
2118 bool sticky)
2119{
2120 pgssEntry *entry;
2121 bool found;
2122
2123 /* Make space if needed */
2125 entry_dealloc();
2126
2127 /* Find or create an entry with desired hash code */
2128 entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
2129
2130 if (!found)
2131 {
2132 /* New entry, initialize it */
2133
2134 /* reset the statistics */
2135 memset(&entry->counters, 0, sizeof(Counters));
2136 /* set the appropriate initial usage count */
2137 entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
2138 /* re-initialize the mutex each time ... we assume no one using it */
2139 SpinLockInit(&entry->mutex);
2140 /* ... and don't forget the query text metadata */
2141 Assert(query_len >= 0);
2142 entry->query_offset = query_offset;
2143 entry->query_len = query_len;
2144 entry->encoding = encoding;
2146 entry->minmax_stats_since = entry->stats_since;
2147 }
2148
2149 return entry;
2150}
2151
2152/*
2153 * qsort comparator for sorting into increasing usage order
2154 */
2155static int
2156entry_cmp(const void *lhs, const void *rhs)
2157{
2158 double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
2159 double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
2160
2161 if (l_usage < r_usage)
2162 return -1;
2163 else if (l_usage > r_usage)
2164 return +1;
2165 else
2166 return 0;
2167}
2168
2169/*
2170 * Deallocate least-used entries.
2171 *
2172 * Caller must hold an exclusive lock on pgss->lock.
2173 */
2174static void
2176{
2177 HASH_SEQ_STATUS hash_seq;
2178 pgssEntry **entries;
2179 pgssEntry *entry;
2180 int nvictims;
2181 int i;
2182 Size tottextlen;
2183 int nvalidtexts;
2184
2185 /*
2186 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
2187 * While we're scanning the table, apply the decay factor to the usage
2188 * values, and update the mean query length.
2189 *
2190 * Note that the mean query length is almost immediately obsolete, since
2191 * we compute it before not after discarding the least-used entries.
2192 * Hopefully, that doesn't affect the mean too much; it doesn't seem worth
2193 * making two passes to get a more current result. Likewise, the new
2194 * cur_median_usage includes the entries we're about to zap.
2195 */
2196
2197 entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
2198
2199 i = 0;
2200 tottextlen = 0;
2201 nvalidtexts = 0;
2202
2203 hash_seq_init(&hash_seq, pgss_hash);
2204 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2205 {
2206 entries[i++] = entry;
2207 /* "Sticky" entries get a different usage decay rate. */
2208 if (IS_STICKY(entry->counters))
2210 else
2212 /* In the mean length computation, ignore dropped texts. */
2213 if (entry->query_len >= 0)
2214 {
2215 tottextlen += entry->query_len + 1;
2216 nvalidtexts++;
2217 }
2218 }
2219
2220 /* Sort into increasing order by usage */
2221 qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
2222
2223 /* Record the (approximate) median usage */
2224 if (i > 0)
2225 pgss->cur_median_usage = entries[i / 2]->counters.usage;
2226 /* Record the mean query length */
2227 if (nvalidtexts > 0)
2228 pgss->mean_query_len = tottextlen / nvalidtexts;
2229 else
2231
2232 /* Now zap an appropriate fraction of lowest-usage entries */
2233 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
2234 nvictims = Min(nvictims, i);
2235
2236 for (i = 0; i < nvictims; i++)
2237 {
2238 hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
2239 }
2240
2241 pfree(entries);
2242
2243 /* Increment the number of times entries are deallocated */
2245 pgss->stats.dealloc += 1;
2247}
2248
2249/*
2250 * Given a query string (not necessarily null-terminated), allocate a new
2251 * entry in the external query text file and store the string there.
2252 *
2253 * If successful, returns true, and stores the new entry's offset in the file
2254 * into *query_offset. Also, if gc_count isn't NULL, *gc_count is set to the
2255 * number of garbage collections that have occurred so far.
2256 *
2257 * On failure, returns false.
2258 *
2259 * At least a shared lock on pgss->lock must be held by the caller, so as
2260 * to prevent a concurrent garbage collection. Share-lock-holding callers
2261 * should pass a gc_count pointer to obtain the number of garbage collections,
2262 * so that they can recheck the count after obtaining exclusive lock to
2263 * detect whether a garbage collection occurred (and removed this entry).
2264 */
2265static bool
2266qtext_store(const char *query, int query_len,
2267 Size *query_offset, int *gc_count)
2268{
2269 Size off;
2270 int fd;
2271
2272 /*
2273 * We use a spinlock to protect extent/n_writers/gc_count, so that
2274 * multiple processes may execute this function concurrently.
2275 */
2277 off = pgss->extent;
2278 pgss->extent += query_len + 1;
2279 pgss->n_writers++;
2280 if (gc_count)
2281 *gc_count = pgss->gc_count;
2283
2284 *query_offset = off;
2285
2286 /*
2287 * Don't allow the file to grow larger than what qtext_load_file can
2288 * (theoretically) handle. This has been seen to be reachable on 32-bit
2289 * platforms.
2290 */
2291 if (unlikely(query_len >= MaxAllocHugeSize - off))
2292 {
2293 errno = EFBIG; /* not quite right, but it'll do */
2294 fd = -1;
2295 goto error;
2296 }
2297
2298 /* Now write the data into the successfully-reserved part of the file */
2299 fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY);
2300 if (fd < 0)
2301 goto error;
2302
2303 if (pg_pwrite(fd, query, query_len, off) != query_len)
2304 goto error;
2305 if (pg_pwrite(fd, "\0", 1, off + query_len) != 1)
2306 goto error;
2307
2309
2310 /* Mark our write complete */
2312 pgss->n_writers--;
2314
2315 return true;
2316
2317error:
2318 ereport(LOG,
2320 errmsg("could not write file \"%s\": %m",
2321 PGSS_TEXT_FILE)));
2322
2323 if (fd >= 0)
2325
2326 /* Mark our write complete */
2328 pgss->n_writers--;
2330
2331 return false;
2332}
2333
2334/*
2335 * Read the external query text file into a malloc'd buffer.
2336 *
2337 * Returns NULL (without throwing an error) if unable to read, eg
2338 * file not there or insufficient memory.
2339 *
2340 * On success, the buffer size is also returned into *buffer_size.
2341 *
2342 * This can be called without any lock on pgss->lock, but in that case
2343 * the caller is responsible for verifying that the result is sane.
2344 */
2345static char *
2347{
2348 char *buf;
2349 int fd;
2350 struct stat stat;
2351 Size nread;
2352
2354 if (fd < 0)
2355 {
2356 if (errno != ENOENT)
2357 ereport(LOG,
2359 errmsg("could not read file \"%s\": %m",
2360 PGSS_TEXT_FILE)));
2361 return NULL;
2362 }
2363
2364 /* Get file length */
2365 if (fstat(fd, &stat))
2366 {
2367 ereport(LOG,
2369 errmsg("could not stat file \"%s\": %m",
2370 PGSS_TEXT_FILE)));
2372 return NULL;
2373 }
2374
2375 /* Allocate buffer; beware that off_t might be wider than size_t */
2377 buf = (char *) malloc(stat.st_size);
2378 else
2379 buf = NULL;
2380 if (buf == NULL)
2381 {
2382 ereport(LOG,
2383 (errcode(ERRCODE_OUT_OF_MEMORY),
2384 errmsg("out of memory"),
2385 errdetail("Could not allocate enough memory to read file \"%s\".",
2386 PGSS_TEXT_FILE)));
2388 return NULL;
2389 }
2390
2391 /*
2392 * OK, slurp in the file. Windows fails if we try to read more than
2393 * INT_MAX bytes at once, and other platforms might not like that either,
2394 * so read a very large file in 1GB segments.
2395 */
2396 nread = 0;
2397 while (nread < stat.st_size)
2398 {
2399 int toread = Min(1024 * 1024 * 1024, stat.st_size - nread);
2400
2401 /*
2402 * If we get a short read and errno doesn't get set, the reason is
2403 * probably that garbage collection truncated the file since we did
2404 * the fstat(), so we don't log a complaint --- but we don't return
2405 * the data, either, since it's most likely corrupt due to concurrent
2406 * writes from garbage collection.
2407 */
2408 errno = 0;
2409 if (read(fd, buf + nread, toread) != toread)
2410 {
2411 if (errno)
2412 ereport(LOG,
2414 errmsg("could not read file \"%s\": %m",
2415 PGSS_TEXT_FILE)));
2416 free(buf);
2418 return NULL;
2419 }
2420 nread += toread;
2421 }
2422
2423 if (CloseTransientFile(fd) != 0)
2424 ereport(LOG,
2426 errmsg("could not close file \"%s\": %m", PGSS_TEXT_FILE)));
2427
2428 *buffer_size = nread;
2429 return buf;
2430}
2431
2432/*
2433 * Locate a query text in the file image previously read by qtext_load_file().
2434 *
2435 * We validate the given offset/length, and return NULL if bogus. Otherwise,
2436 * the result points to a null-terminated string within the buffer.
2437 */
2438static char *
2439qtext_fetch(Size query_offset, int query_len,
2440 char *buffer, Size buffer_size)
2441{
2442 /* File read failed? */
2443 if (buffer == NULL)
2444 return NULL;
2445 /* Bogus offset/length? */
2446 if (query_len < 0 ||
2447 query_offset + query_len >= buffer_size)
2448 return NULL;
2449 /* As a further sanity check, make sure there's a trailing null */
2450 if (buffer[query_offset + query_len] != '\0')
2451 return NULL;
2452 /* Looks OK */
2453 return buffer + query_offset;
2454}
2455
2456/*
2457 * Do we need to garbage-collect the external query text file?
2458 *
2459 * Caller should hold at least a shared lock on pgss->lock.
2460 */
2461static bool
2463{
2464 Size extent;
2465
2466 /* Read shared extent pointer */
2468 extent = pgss->extent;
2470
2471 /*
2472 * Don't proceed if file does not exceed 512 bytes per possible entry.
2473 *
2474 * Here and in the next test, 32-bit machines have overflow hazards if
2475 * pgss_max and/or mean_query_len are large. Force the multiplications
2476 * and comparisons to be done in uint64 arithmetic to forestall trouble.
2477 */
2478 if ((uint64) extent < (uint64) 512 * pgss_max)
2479 return false;
2480
2481 /*
2482 * Don't proceed if file is less than about 50% bloat. Nothing can or
2483 * should be done in the event of unusually large query texts accounting
2484 * for file's large size. We go to the trouble of maintaining the mean
2485 * query length in order to prevent garbage collection from thrashing
2486 * uselessly.
2487 */
2488 if ((uint64) extent < (uint64) pgss->mean_query_len * pgss_max * 2)
2489 return false;
2490
2491 return true;
2492}
2493
2494/*
2495 * Garbage-collect orphaned query texts in external file.
2496 *
2497 * This won't be called often in the typical case, since it's likely that
2498 * there won't be too much churn, and besides, a similar compaction process
2499 * occurs when serializing to disk at shutdown or as part of resetting.
2500 * Despite this, it seems prudent to plan for the edge case where the file
2501 * becomes unreasonably large, with no other method of compaction likely to
2502 * occur in the foreseeable future.
2503 *
2504 * The caller must hold an exclusive lock on pgss->lock.
2505 *
2506 * At the first sign of trouble we unlink the query text file to get a clean
2507 * slate (although existing statistics are retained), rather than risk
2508 * thrashing by allowing the same problem case to recur indefinitely.
2509 */
2510static void
2512{
2513 char *qbuffer;
2514 Size qbuffer_size;
2515 FILE *qfile = NULL;
2516 HASH_SEQ_STATUS hash_seq;
2517 pgssEntry *entry;
2518 Size extent;
2519 int nentries;
2520
2521 /*
2522 * When called from pgss_store, some other session might have proceeded
2523 * with garbage collection in the no-lock-held interim of lock strength
2524 * escalation. Check once more that this is actually necessary.
2525 */
2526 if (!need_gc_qtexts())
2527 return;
2528
2529 /*
2530 * Load the old texts file. If we fail (out of memory, for instance),
2531 * invalidate query texts. Hopefully this is rare. It might seem better
2532 * to leave things alone on an OOM failure, but the problem is that the
2533 * file is only going to get bigger; hoping for a future non-OOM result is
2534 * risky and can easily lead to complete denial of service.
2535 */
2536 qbuffer = qtext_load_file(&qbuffer_size);
2537 if (qbuffer == NULL)
2538 goto gc_fail;
2539
2540 /*
2541 * We overwrite the query texts file in place, so as to reduce the risk of
2542 * an out-of-disk-space failure. Since the file is guaranteed not to get
2543 * larger, this should always work on traditional filesystems; though we
2544 * could still lose on copy-on-write filesystems.
2545 */
2547 if (qfile == NULL)
2548 {
2549 ereport(LOG,
2551 errmsg("could not write file \"%s\": %m",
2552 PGSS_TEXT_FILE)));
2553 goto gc_fail;
2554 }
2555
2556 extent = 0;
2557 nentries = 0;
2558
2559 hash_seq_init(&hash_seq, pgss_hash);
2560 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2561 {
2562 int query_len = entry->query_len;
2563 char *qry = qtext_fetch(entry->query_offset,
2564 query_len,
2565 qbuffer,
2566 qbuffer_size);
2567
2568 if (qry == NULL)
2569 {
2570 /* Trouble ... drop the text */
2571 entry->query_offset = 0;
2572 entry->query_len = -1;
2573 /* entry will not be counted in mean query length computation */
2574 continue;
2575 }
2576
2577 if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
2578 {
2579 ereport(LOG,
2581 errmsg("could not write file \"%s\": %m",
2582 PGSS_TEXT_FILE)));
2583 hash_seq_term(&hash_seq);
2584 goto gc_fail;
2585 }
2586
2587 entry->query_offset = extent;
2588 extent += query_len + 1;
2589 nentries++;
2590 }
2591
2592 /*
2593 * Truncate away any now-unused space. If this fails for some odd reason,
2594 * we log it, but there's no need to fail.
2595 */
2596 if (ftruncate(fileno(qfile), extent) != 0)
2597 ereport(LOG,
2599 errmsg("could not truncate file \"%s\": %m",
2600 PGSS_TEXT_FILE)));
2601
2602 if (FreeFile(qfile))
2603 {
2604 ereport(LOG,
2606 errmsg("could not write file \"%s\": %m",
2607 PGSS_TEXT_FILE)));
2608 qfile = NULL;
2609 goto gc_fail;
2610 }
2611
2612 elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
2613 pgss->extent, extent);
2614
2615 /* Reset the shared extent pointer */
2616 pgss->extent = extent;
2617
2618 /*
2619 * Also update the mean query length, to be sure that need_gc_qtexts()
2620 * won't still think we have a problem.
2621 */
2622 if (nentries > 0)
2623 pgss->mean_query_len = extent / nentries;
2624 else
2626
2627 free(qbuffer);
2628
2629 /*
2630 * OK, count a garbage collection cycle. (Note: even though we have
2631 * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
2632 * other processes may examine gc_count while holding only the mutex.
2633 * Also, we have to advance the count *after* we've rewritten the file,
2634 * else other processes might not realize they read a stale file.)
2635 */
2637
2638 return;
2639
2640gc_fail:
2641 /* clean up resources */
2642 if (qfile)
2643 FreeFile(qfile);
2644 free(qbuffer);
2645
2646 /*
2647 * Since the contents of the external file are now uncertain, mark all
2648 * hashtable entries as having invalid texts.
2649 */
2650 hash_seq_init(&hash_seq, pgss_hash);
2651 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2652 {
2653 entry->query_offset = 0;
2654 entry->query_len = -1;
2655 }
2656
2657 /*
2658 * Destroy the query text file and create a new, empty one
2659 */
2660 (void) unlink(PGSS_TEXT_FILE);
2662 if (qfile == NULL)
2663 ereport(LOG,
2665 errmsg("could not recreate file \"%s\": %m",
2666 PGSS_TEXT_FILE)));
2667 else
2668 FreeFile(qfile);
2669
2670 /* Reset the shared extent pointer */
2671 pgss->extent = 0;
2672
2673 /* Reset mean_query_len to match the new state */
2675
2676 /*
2677 * Bump the GC count even though we failed.
2678 *
2679 * This is needed to make concurrent readers of file without any lock on
2680 * pgss->lock notice existence of new version of file. Once readers
2681 * subsequently observe a change in GC count with pgss->lock held, that
2682 * forces a safe reopen of file. Writers also require that we bump here,
2683 * of course. (As required by locking protocol, readers and writers don't
2684 * trust earlier file contents until gc_count is found unchanged after
2685 * pgss->lock acquired in shared or exclusive mode respectively.)
2686 */
2688}
2689
2690#define SINGLE_ENTRY_RESET(e) \
2691if (e) { \
2692 if (minmax_only) { \
2693 /* When requested reset only min/max statistics of an entry */ \
2694 for (int kind = 0; kind < PGSS_NUMKIND; kind++) \
2695 { \
2696 e->counters.max_time[kind] = 0; \
2697 e->counters.min_time[kind] = 0; \
2698 } \
2699 e->minmax_stats_since = stats_reset; \
2700 } \
2701 else \
2702 { \
2703 /* Remove the key otherwise */ \
2704 hash_search(pgss_hash, &e->key, HASH_REMOVE, NULL); \
2705 num_remove++; \
2706 } \
2707}
2708
2709/*
2710 * Reset entries corresponding to parameters passed.
2711 */
2712static TimestampTz
2713entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
2714{
2715 HASH_SEQ_STATUS hash_seq;
2716 pgssEntry *entry;
2717 FILE *qfile;
2718 int64 num_entries;
2719 int64 num_remove = 0;
2721 TimestampTz stats_reset;
2722
2723 if (!pgss || !pgss_hash)
2724 ereport(ERROR,
2725 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
2726 errmsg("pg_stat_statements must be loaded via \"shared_preload_libraries\"")));
2727
2729 num_entries = hash_get_num_entries(pgss_hash);
2730
2731 stats_reset = GetCurrentTimestamp();
2732
2733 if (userid != 0 && dbid != 0 && queryid != INT64CONST(0))
2734 {
2735 /* If all the parameters are available, use the fast path. */
2736 memset(&key, 0, sizeof(pgssHashKey));
2737 key.userid = userid;
2738 key.dbid = dbid;
2739 key.queryid = queryid;
2740
2741 /*
2742 * Reset the entry if it exists, starting with the non-top-level
2743 * entry.
2744 */
2745 key.toplevel = false;
2746 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2747
2748 SINGLE_ENTRY_RESET(entry);
2749
2750 /* Also reset the top-level entry if it exists. */
2751 key.toplevel = true;
2752 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
2753
2754 SINGLE_ENTRY_RESET(entry);
2755 }
2756 else if (userid != 0 || dbid != 0 || queryid != INT64CONST(0))
2757 {
2758 /* Reset entries corresponding to valid parameters. */
2759 hash_seq_init(&hash_seq, pgss_hash);
2760 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2761 {
2762 if ((!userid || entry->key.userid == userid) &&
2763 (!dbid || entry->key.dbid == dbid) &&
2764 (!queryid || entry->key.queryid == queryid))
2765 {
2766 SINGLE_ENTRY_RESET(entry);
2767 }
2768 }
2769 }
2770 else
2771 {
2772 /* Reset all entries. */
2773 hash_seq_init(&hash_seq, pgss_hash);
2774 while ((entry = hash_seq_search(&hash_seq)) != NULL)
2775 {
2776 SINGLE_ENTRY_RESET(entry);
2777 }
2778 }
2779
2780 /* All entries are removed? */
2781 if (num_entries != num_remove)
2782 goto release_lock;
2783
2784 /*
2785 * Reset global statistics for pg_stat_statements since all entries are
2786 * removed.
2787 */
2789 pgss->stats.dealloc = 0;
2790 pgss->stats.stats_reset = stats_reset;
2792
2793 /*
2794 * Write new empty query file, perhaps even creating a new one to recover
2795 * if the file was missing.
2796 */
2798 if (qfile == NULL)
2799 {
2800 ereport(LOG,
2802 errmsg("could not create file \"%s\": %m",
2803 PGSS_TEXT_FILE)));
2804 goto done;
2805 }
2806
2807 /* If ftruncate fails, log it, but it's not a fatal problem */
2808 if (ftruncate(fileno(qfile), 0) != 0)
2809 ereport(LOG,
2811 errmsg("could not truncate file \"%s\": %m",
2812 PGSS_TEXT_FILE)));
2813
2814 FreeFile(qfile);
2815
2816done:
2817 pgss->extent = 0;
2818 /* This counts as a query text garbage collection for our purposes */
2820
2821release_lock:
2823
2824 return stats_reset;
2825}
2826
2827/*
2828 * Generate a normalized version of the query string that will be used to
2829 * represent all similar queries.
2830 *
2831 * Note that the normalized representation may well vary depending on
2832 * just which "equivalent" query is used to create the hashtable entry.
2833 * We assume this is OK.
2834 *
2835 * If query_loc > 0, then "query" has been advanced by that much compared to
2836 * the original string start, so we need to translate the provided locations
2837 * to compensate. (This lets us avoid re-scanning statements before the one
2838 * of interest, so it's worth doing.)
2839 *
2840 * *query_len_p contains the input string length, and is updated with
2841 * the result string length on exit. The resulting string might be longer
2842 * or shorter depending on what happens with replacement of constants.
2843 *
2844 * Returns a palloc'd string.
2845 */
2846static char *
2847generate_normalized_query(JumbleState *jstate, const char *query,
2848 int query_loc, int *query_len_p)
2849{
2850 char *norm_query;
2851 int query_len = *query_len_p;
2852 int norm_query_buflen, /* Space allowed for norm_query */
2853 len_to_wrt, /* Length (in bytes) to write */
2854 quer_loc = 0, /* Source query byte location */
2855 n_quer_loc = 0, /* Normalized query byte location */
2856 last_off = 0, /* Offset from start for previous tok */
2857 last_tok_len = 0; /* Length (in bytes) of that tok */
2858 int num_constants_replaced = 0;
2859
2860 /*
2861 * Get constants' lengths (core system only gives us locations). Note
2862 * this also ensures the items are sorted by location.
2863 */
2864 fill_in_constant_lengths(jstate, query, query_loc);
2865
2866 /*
2867 * Allow for $n symbols to be longer than the constants they replace.
2868 * Constants must take at least one byte in text form, while a $n symbol
2869 * certainly isn't more than 11 bytes, even if n reaches INT_MAX. We
2870 * could refine that limit based on the max value of n for the current
2871 * query, but it hardly seems worth any extra effort to do so.
2872 */
2873 norm_query_buflen = query_len + jstate->clocations_count * 10;
2874
2875 /* Allocate result buffer */
2876 norm_query = palloc(norm_query_buflen + 1);
2877
2878 for (int i = 0; i < jstate->clocations_count; i++)
2879 {
2880 int off, /* Offset from start for cur tok */
2881 tok_len; /* Length (in bytes) of that tok */
2882
2883 /*
2884 * If we have an external param at this location, but no lists are
2885 * being squashed across the query, then we skip here; this will make
2886 * us print the characters found in the original query that represent
2887 * the parameter in the next iteration (or after the loop is done),
2888 * which is a bit odd but seems to work okay in most cases.
2889 */
2890 if (jstate->clocations[i].extern_param && !jstate->has_squashed_lists)
2891 continue;
2892
2893 off = jstate->clocations[i].location;
2894
2895 /* Adjust recorded location if we're dealing with partial string */
2896 off -= query_loc;
2897
2898 tok_len = jstate->clocations[i].length;
2899
2900 if (tok_len < 0)
2901 continue; /* ignore any duplicates */
2902
2903 /* Copy next chunk (what precedes the next constant) */
2904 len_to_wrt = off - last_off;
2905 len_to_wrt -= last_tok_len;
2906 Assert(len_to_wrt >= 0);
2907 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2908 n_quer_loc += len_to_wrt;
2909
2910 /*
2911 * And insert a param symbol in place of the constant token; and, if
2912 * we have a squashable list, insert a placeholder comment starting
2913 * from the list's second value.
2914 */
2915 n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d%s",
2916 num_constants_replaced + 1 + jstate->highest_extern_param_id,
2917 jstate->clocations[i].squashed ? " /*, ... */" : "");
2918 num_constants_replaced++;
2919
2920 /* move forward */
2921 quer_loc = off + tok_len;
2922 last_off = off;
2923 last_tok_len = tok_len;
2924 }
2925
2926 /*
2927 * We've copied up until the last ignorable constant. Copy over the
2928 * remaining bytes of the original query string.
2929 */
2930 len_to_wrt = query_len - quer_loc;
2931
2932 Assert(len_to_wrt >= 0);
2933 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
2934 n_quer_loc += len_to_wrt;
2935
2936 Assert(n_quer_loc <= norm_query_buflen);
2937 norm_query[n_quer_loc] = '\0';
2938
2939 *query_len_p = n_quer_loc;
2940 return norm_query;
2941}
2942
2943/*
2944 * Given a valid SQL string and an array of constant-location records,
2945 * fill in the textual lengths of those constants.
2946 *
2947 * The constants may use any allowed constant syntax, such as float literals,
2948 * bit-strings, single-quoted strings and dollar-quoted strings. This is
2949 * accomplished by using the public API for the core scanner.
2950 *
2951 * It is the caller's job to ensure that the string is a valid SQL statement
2952 * with constants at the indicated locations. Since in practice the string
2953 * has already been parsed, and the locations that the caller provides will
2954 * have originated from within the authoritative parser, this should not be
2955 * a problem.
2956 *
2957 * Multiple constants can have the same location. We reset lengths of those
2958 * past the first to -1 so that they can later be ignored.
2959 *
2960 * If query_loc > 0, then "query" has been advanced by that much compared to
2961 * the original string start, so we need to translate the provided locations
2962 * to compensate. (This lets us avoid re-scanning statements before the one
2963 * of interest, so it's worth doing.)
2964 *
2965 * N.B. There is an assumption that a '-' character at a Const location begins
2966 * a negative numeric constant. This precludes there ever being another
2967 * reason for a constant to start with a '-'.
2968 */
2969static void
2970fill_in_constant_lengths(JumbleState *jstate, const char *query,
2971 int query_loc)
2972{
2973 LocationLen *locs;
2974 core_yyscan_t yyscanner;
2976 core_YYSTYPE yylval;
2978
2979 /*
2980 * Sort the records by location so that we can process them in order while
2981 * scanning the query text.
2982 */
2983 if (jstate->clocations_count > 1)
2984 qsort(jstate->clocations, jstate->clocations_count,
2985 sizeof(LocationLen), comp_location);
2986 locs = jstate->clocations;
2987
2988 /* initialize the flex scanner --- should match raw_parser() */
2989 yyscanner = scanner_init(query,
2990 &yyextra,
2991 &ScanKeywords,
2993
2994 /* we don't want to re-emit any escape string warnings */
2995 yyextra.escape_string_warning = false;
2996
2997 /* Search for each constant, in sequence */
2998 for (int i = 0; i < jstate->clocations_count; i++)
2999 {
3000 int loc;
3001 int tok;
3002
3003 /* Ignore constants after the first one in the same location */
3004 if (i > 0 && locs[i].location == locs[i - 1].location)
3005 {
3006 locs[i].length = -1;
3007 continue;
3008 }
3009
3010 if (locs[i].squashed)
3011 continue; /* squashable list, ignore */
3012
3013 /* Adjust recorded location if we're dealing with partial string */
3014 loc = locs[i].location - query_loc;
3015 Assert(loc >= 0);
3016
3017 /*
3018 * We have a valid location for a constant that's not a dupe. Lex
3019 * tokens until we find the desired constant.
3020 */
3021 for (;;)
3022 {
3023 tok = core_yylex(&yylval, &yylloc, yyscanner);
3024
3025 /* We should not hit end-of-string, but if we do, behave sanely */
3026 if (tok == 0)
3027 break; /* out of inner for-loop */
3028
3029 /*
3030 * We should find the token position exactly, but if we somehow
3031 * run past it, work with that.
3032 */
3033 if (yylloc >= loc)
3034 {
3035 if (query[loc] == '-')
3036 {
3037 /*
3038 * It's a negative value - this is the one and only case
3039 * where we replace more than a single token.
3040 *
3041 * Do not compensate for the core system's special-case
3042 * adjustment of location to that of the leading '-'
3043 * operator in the event of a negative constant. It is
3044 * also useful for our purposes to start from the minus
3045 * symbol. In this way, queries like "select * from foo
3046 * where bar = 1" and "select * from foo where bar = -2"
3047 * will have identical normalized query strings.
3048 */
3049 tok = core_yylex(&yylval, &yylloc, yyscanner);
3050 if (tok == 0)
3051 break; /* out of inner for-loop */
3052 }
3053
3054 /*
3055 * We now rely on the assumption that flex has placed a zero
3056 * byte after the text of the current token in scanbuf.
3057 */
3058 locs[i].length = strlen(yyextra.scanbuf + loc);
3059 break; /* out of inner for-loop */
3060 }
3061 }
3062
3063 /* If we hit end-of-string, give up, leaving remaining lengths -1 */
3064 if (tok == 0)
3065 break;
3066 }
3067
3068 scanner_finish(yyscanner);
3069}
3070
3071/*
3072 * comp_location: comparator for qsorting LocationLen structs by location
3073 */
3074static int
3075comp_location(const void *a, const void *b)
3076{
3077 int l = ((const LocationLen *) a)->location;
3078 int r = ((const LocationLen *) b)->location;
3079
3080 return pg_cmp_s32(l, r);
3081}
bool has_privs_of_role(Oid member, Oid role)
Definition: acl.c:5284
void(* post_parse_analyze_hook_type)(ParseState *pstate, Query *query, JumbleState *jstate)
Definition: analyze.h:22
Datum numeric_in(PG_FUNCTION_ARGS)
Definition: numeric.c:626
TimestampTz GetCurrentTimestamp(void)
Definition: timestamp.c:1645
static Datum values[MAXATTR]
Definition: bootstrap.c:153
#define CStringGetTextDatum(s)
Definition: builtins.h:97
#define INT64CONST(x)
Definition: c.h:557
#define Min(x, y)
Definition: c.h:1008
#define PG_BINARY_R
Definition: c.h:1265
#define MAXALIGN(LEN)
Definition: c.h:815
#define Max(x, y)
Definition: c.h:1002
int64_t int64
Definition: c.h:540
#define PG_BINARY
Definition: c.h:1263
#define UINT64_FORMAT
Definition: c.h:562
int32_t int32
Definition: c.h:539
uint64_t uint64
Definition: c.h:544
#define unlikely(x)
Definition: c.h:407
uint32_t uint32
Definition: c.h:543
#define PG_BINARY_W
Definition: c.h:1266
size_t Size
Definition: c.h:615
enc
int64 TimestampTz
Definition: timestamp.h:39
void * hash_search(HTAB *hashp, const void *keyPtr, HASHACTION action, bool *foundPtr)
Definition: dynahash.c:952
Size hash_estimate_size(int64 num_entries, Size entrysize)
Definition: dynahash.c:783
void * hash_seq_search(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1415
void hash_seq_term(HASH_SEQ_STATUS *status)
Definition: dynahash.c:1509
int64 hash_get_num_entries(HTAB *hashp)
Definition: dynahash.c:1336
void hash_seq_init(HASH_SEQ_STATUS *status, HTAB *hashp)
Definition: dynahash.c:1380
int errcode_for_file_access(void)
Definition: elog.c:886
int errdetail(const char *fmt,...)
Definition: elog.c:1216
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define LOG
Definition: elog.h:31
#define PG_TRY(...)
Definition: elog.h:372
#define PG_END_TRY(...)
Definition: elog.h:397
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define PG_FINALLY(...)
Definition: elog.h:389
#define ereport(elevel,...)
Definition: elog.h:150
ExecutorEnd_hook_type ExecutorEnd_hook
Definition: execMain.c:71
ExecutorFinish_hook_type ExecutorFinish_hook
Definition: execMain.c:70
ExecutorStart_hook_type ExecutorStart_hook
Definition: execMain.c:68
void standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
Definition: execMain.c:141
ExecutorRun_hook_type ExecutorRun_hook
Definition: execMain.c:69
void standard_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
Definition: execMain.c:307
void standard_ExecutorEnd(QueryDesc *queryDesc)
Definition: execMain.c:475
void standard_ExecutorFinish(QueryDesc *queryDesc)
Definition: execMain.c:415
void(* ExecutorFinish_hook_type)(QueryDesc *queryDesc)
Definition: executor.h:86
void(* ExecutorRun_hook_type)(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
Definition: executor.h:80
void(* ExecutorStart_hook_type)(QueryDesc *queryDesc, int eflags)
Definition: executor.h:76
void(* ExecutorEnd_hook_type)(QueryDesc *queryDesc)
Definition: executor.h:90
int durable_rename(const char *oldfile, const char *newfile, int elevel)
Definition: fd.c:779
int CloseTransientFile(int fd)
Definition: fd.c:2868
int FreeFile(FILE *file)
Definition: fd.c:2840
FILE * AllocateFile(const char *name, const char *mode)
Definition: fd.c:2641
int OpenTransientFile(const char *fileName, int fileFlags)
Definition: fd.c:2691
#define PG_RETURN_VOID()
Definition: fmgr.h:349
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_GETARG_INT64(n)
Definition: fmgr.h:283
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define DirectFunctionCall3(func, arg1, arg2, arg3)
Definition: fmgr.h:686
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition: funcapi.c:76
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
bool IsUnderPostmaster
Definition: globals.c:120
Oid MyDatabaseId
Definition: globals.c:94
void DefineCustomEnumVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, const struct config_enum_entry *options, GucContext context, int flags, GucEnumCheckHook check_hook, GucEnumAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:5114
void DefineCustomBoolVariable(const char *name, const char *short_desc, const char *long_desc, bool *valueAddr, bool bootValue, GucContext context, int flags, GucBoolCheckHook check_hook, GucBoolAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:5011
void MarkGUCPrefixReserved(const char *className)
Definition: guc.c:5148
void DefineCustomIntVariable(const char *name, const char *short_desc, const char *long_desc, int *valueAddr, int bootValue, int minValue, int maxValue, GucContext context, int flags, GucIntCheckHook check_hook, GucIntAssignHook assign_hook, GucShowHook show_hook)
Definition: guc.c:5035
@ PGC_SUSET
Definition: guc.h:78
@ PGC_POSTMASTER
Definition: guc.h:74
@ PGC_SIGHUP
Definition: guc.h:75
Assert(PointerIsAligned(start, uint64))
return str start
#define free(a)
Definition: header.h:65
#define malloc(a)
Definition: header.h:50
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1117
@ HASH_FIND
Definition: hsearch.h:113
@ HASH_REMOVE
Definition: hsearch.h:115
@ HASH_ENTER
Definition: hsearch.h:114
#define HASH_ELEM
Definition: hsearch.h:95
#define HASH_BLOBS
Definition: hsearch.h:97
#define INSTR_TIME_SET_CURRENT(t)
Definition: instr_time.h:122
#define INSTR_TIME_SUBTRACT(x, y)
Definition: instr_time.h:181
#define INSTR_TIME_GET_MILLISEC(t)
Definition: instr_time.h:191
void InstrEndLoop(Instrumentation *instr)
Definition: instrument.c:140
Instrumentation * InstrAlloc(int n, int instrument_options, bool async_mode)
Definition: instrument.c:31
WalUsage pgWalUsage
Definition: instrument.c:22
void WalUsageAccumDiff(WalUsage *dst, const WalUsage *add, const WalUsage *sub)
Definition: instrument.c:288
BufferUsage pgBufferUsage
Definition: instrument.c:20
void BufferUsageAccumDiff(BufferUsage *dst, const BufferUsage *add, const BufferUsage *sub)
Definition: instrument.c:248
@ INSTRUMENT_ALL
Definition: instrument.h:67
static int pg_cmp_s32(int32 a, int32 b)
Definition: int.h:646
#define read(a, b, c)
Definition: win32.h:13
void on_shmem_exit(pg_on_exit_callback function, Datum arg)
Definition: ipc.c:365
void(* shmem_startup_hook_type)(void)
Definition: ipc.h:22
shmem_startup_hook_type shmem_startup_hook
Definition: ipci.c:59
void RequestAddinShmemSpace(Size size)
Definition: ipci.c:75
int b
Definition: isn.c:74
int a
Definition: isn.c:73
int i
Definition: isn.c:77
PGDLLIMPORT const ScanKeywordList ScanKeywords
bool LWLockAcquire(LWLock *lock, LWLockMode mode)
Definition: lwlock.c:1174
LWLockPadded * GetNamedLWLockTranche(const char *tranche_name)
Definition: lwlock.c:566
void RequestNamedLWLockTranche(const char *tranche_name, int num_lwlocks)
Definition: lwlock.c:649
void LWLockRelease(LWLock *lock)
Definition: lwlock.c:1894
@ LW_SHARED
Definition: lwlock.h:113
@ LW_EXCLUSIVE
Definition: lwlock.h:112
int GetDatabaseEncoding(void)
Definition: mbutils.c:1262
char * pg_any_to_server(const char *s, int len, int encoding)
Definition: mbutils.c:677
void * repalloc(void *pointer, Size size)
Definition: mcxt.c:1610
void pfree(void *pointer)
Definition: mcxt.c:1594
void * palloc(Size size)
Definition: mcxt.c:1365
#define MaxAllocHugeSize
Definition: memutils.h:45
void(* shmem_request_hook_type)(void)
Definition: miscadmin.h:533
Oid GetUserId(void)
Definition: miscinit.c:469
shmem_request_hook_type shmem_request_hook
Definition: miscinit.c:1789
bool process_shared_preload_libraries_in_progress
Definition: miscinit.c:1786
#define IsA(nodeptr, _type_)
Definition: nodes.h:164
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
post_parse_analyze_hook_type post_parse_analyze_hook
Definition: analyze.c:67
void * arg
const void size_t len
int32 encoding
Definition: pg_database.h:41
static int entry_cmp(const void *lhs, const void *rhs)
#define PG_STAT_STATEMENTS_COLS_V1_0
static planner_hook_type prev_planner_hook
@ PGSS_V1_9
@ PGSS_V1_10
@ PGSS_V1_12
@ PGSS_V1_1
@ PGSS_V1_11
@ PGSS_V1_3
@ PGSS_V1_2
@ PGSS_V1_8
@ PGSS_V1_0
@ PGSS_V1_13
#define SINGLE_ENTRY_RESET(e)
static int pgss_track
static bool pgss_track_planning
#define ASSUMED_MEDIAN_INIT
#define PG_STAT_STATEMENTS_INFO_COLS
PG_FUNCTION_INFO_V1(pg_stat_statements_reset)
static ExecutorRun_hook_type prev_ExecutorRun
struct pgssSharedState pgssSharedState
static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, bool showtext)
#define record_gc_qtexts()
Datum pg_stat_statements_reset_1_7(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_COLS_V1_13
void _PG_init(void)
static void gc_qtexts(void)
#define PG_STAT_STATEMENTS_COLS_V1_8
static int comp_location(const void *a, const void *b)
Datum pg_stat_statements_1_11(PG_FUNCTION_ARGS)
#define PG_STAT_STATEMENTS_COLS
struct Counters Counters
PG_MODULE_MAGIC_EXT(.name="pg_stat_statements",.version=PG_VERSION)
Datum pg_stat_statements_1_9(PG_FUNCTION_ARGS)
#define PGSS_TEXT_FILE
PGSSTrackLevel
@ PGSS_TRACK_ALL
@ PGSS_TRACK_NONE
@ PGSS_TRACK_TOP
static PlannedStmt * pgss_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
static char * qtext_fetch(Size query_offset, int query_len, char *buffer, Size buffer_size)
static int pgss_max
#define USAGE_DEALLOC_PERCENT
static bool qtext_store(const char *query, int query_len, Size *query_offset, int *gc_count)
Datum pg_stat_statements_1_10(PG_FUNCTION_ARGS)
#define USAGE_EXEC(duration)
#define PG_STAT_STATEMENTS_COLS_V1_11
#define STICKY_DECREASE_FACTOR
#define IS_STICKY(c)
static const struct config_enum_entry track_options[]
#define PG_STAT_STATEMENTS_COLS_V1_2
#define PG_STAT_STATEMENTS_COLS_V1_12
Datum pg_stat_statements_reset(PG_FUNCTION_ARGS)
static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
#define PGSS_DUMP_FILE
Datum pg_stat_statements_1_13(PG_FUNCTION_ARGS)
static char * qtext_load_file(Size *buffer_size)
static post_parse_analyze_hook_type prev_post_parse_analyze_hook
static bool need_gc_qtexts(void)
#define pgss_enabled(level)
static shmem_startup_hook_type prev_shmem_startup_hook
static shmem_request_hook_type prev_shmem_request_hook
static void pgss_shmem_request(void)
pgssStoreKind
@ PGSS_PLAN
@ PGSS_EXEC
@ PGSS_INVALID
#define ASSUMED_LENGTH_INIT
#define PG_STAT_STATEMENTS_COLS_V1_3
static Size pgss_memsize(void)
#define PGSS_NUMKIND
static bool pgss_save
static void pgss_shmem_startup(void)
static int nesting_level
struct pgssGlobalStats pgssGlobalStats
static const uint32 PGSS_PG_MAJOR_VERSION
Datum pg_stat_statements_1_2(PG_FUNCTION_ARGS)
struct pgssEntry pgssEntry
#define USAGE_DECREASE_FACTOR
static ExecutorStart_hook_type prev_ExecutorStart
static void pgss_store(const char *query, int64 queryId, int query_location, int query_len, pgssStoreKind kind, double total_time, uint64 rows, const BufferUsage *bufusage, const WalUsage *walusage, const struct JitInstrumentation *jitusage, JumbleState *jstate, int parallel_workers_to_launch, int parallel_workers_launched, PlannedStmtOrigin planOrigin)
Datum pg_stat_statements(PG_FUNCTION_ARGS)
Datum pg_stat_statements_info(PG_FUNCTION_ARGS)
static void entry_dealloc(void)
#define PG_STAT_STATEMENTS_COLS_V1_10
static pgssSharedState * pgss
Datum pg_stat_statements_1_3(PG_FUNCTION_ARGS)
static void pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
static void pgss_ExecutorFinish(QueryDesc *queryDesc)
static ProcessUtility_hook_type prev_ProcessUtility
#define PG_STAT_STATEMENTS_COLS_V1_1
Datum pg_stat_statements_1_8(PG_FUNCTION_ARGS)
static void pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
struct pgssHashKey pgssHashKey
Datum pg_stat_statements_reset_1_11(PG_FUNCTION_ARGS)
static pgssEntry * entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky)
static void fill_in_constant_lengths(JumbleState *jstate, const char *query, int query_loc)
static bool pgss_track_utility
#define USAGE_INIT
Datum pg_stat_statements_1_12(PG_FUNCTION_ARGS)
static ExecutorEnd_hook_type prev_ExecutorEnd
#define PG_STAT_STATEMENTS_COLS_V1_9
static void pgss_ExecutorEnd(QueryDesc *queryDesc)
static char * generate_normalized_query(JumbleState *jstate, const char *query, int query_loc, int *query_len_p)
static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
static HTAB * pgss_hash
static const uint32 PGSS_FILE_HEADER
static void pgss_shmem_shutdown(int code, Datum arg)
static ExecutorFinish_hook_type prev_ExecutorFinish
static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only)
static char * buf
Definition: pg_test_fsync.c:72
#define PG_VALID_BE_ENCODING(_enc)
Definition: pg_wchar.h:281
static int duration
Definition: pgbench.c:175
planner_hook_type planner_hook
Definition: planner.c:74
PlannedStmt * standard_planner(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
Definition: planner.c:333
PlannedStmt *(* planner_hook_type)(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams, ExplainState *es)
Definition: planner.h:28
PlannedStmtOrigin
Definition: plannodes.h:38
@ PLAN_STMT_UNKNOWN
Definition: plannodes.h:39
@ PLAN_STMT_CACHE_CUSTOM
Definition: plannodes.h:43
@ PLAN_STMT_CACHE_GENERIC
Definition: plannodes.h:42
#define sprintf
Definition: port.h:241
#define pg_pwrite
Definition: port.h:227
#define snprintf
Definition: port.h:239
#define qsort(a, b, c, d)
Definition: port.h:479
static Datum Int64GetDatum(int64 X)
Definition: postgres.h:403
#define Int64GetDatumFast(X)
Definition: postgres.h:515
#define Float8GetDatumFast(X)
Definition: postgres.h:517
static Datum BoolGetDatum(bool X)
Definition: postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:262
uint64_t Datum
Definition: postgres.h:70
static Datum CStringGetDatum(const char *X)
Definition: postgres.h:360
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:222
unsigned int Oid
Definition: postgres_ext.h:32
static int fd(const char *x, int i)
Definition: preproc-init.c:105
const char * YYLTYPE
const char * CleanQuerytext(const char *query, int *location, int *len)
void EnableQueryId(void)
static struct subre * parse(struct vars *v, int stopper, int type, struct state *init, struct state *final)
Definition: regcomp.c:717
core_yyscan_t scanner_init(const char *str, core_yy_extra_type *yyext, const ScanKeywordList *keywordlist, const uint16 *keyword_tokens)
Definition: scan.l:1249
#define yylloc
Definition: scan.l:1122
void scanner_finish(core_yyscan_t yyscanner)
Definition: scan.l:1291
#define yyextra
Definition: scan.l:1118
const uint16 ScanKeywordTokens[]
Definition: scan.l:81
void * core_yyscan_t
Definition: scanner.h:121
int core_yylex(core_YYSTYPE *yylval_param, YYLTYPE *yylloc_param, core_yyscan_t yyscanner)
ScanDirection
Definition: sdir.h:25
Size add_size(Size s1, Size s2)
Definition: shmem.c:494
HTAB * ShmemInitHash(const char *name, int64 init_size, int64 max_size, HASHCTL *infoP, int hash_flags)
Definition: shmem.c:333
void * ShmemInitStruct(const char *name, Size size, bool *foundPtr)
Definition: shmem.c:388
#define SpinLockInit(lock)
Definition: spin.h:57
#define SpinLockRelease(lock)
Definition: spin.h:61
#define SpinLockAcquire(lock)
Definition: spin.h:59
static void error(void)
Definition: sql-dyntest.c:147
instr_time local_blk_read_time
Definition: instrument.h:38
int64 shared_blks_dirtied
Definition: instrument.h:28
int64 local_blks_hit
Definition: instrument.h:30
instr_time temp_blk_write_time
Definition: instrument.h:41
instr_time shared_blk_read_time
Definition: instrument.h:36
instr_time shared_blk_write_time
Definition: instrument.h:37
int64 local_blks_written
Definition: instrument.h:33
instr_time temp_blk_read_time
Definition: instrument.h:40
instr_time local_blk_write_time
Definition: instrument.h:39
int64 temp_blks_read
Definition: instrument.h:34
int64 shared_blks_read
Definition: instrument.h:27
int64 shared_blks_written
Definition: instrument.h:29
int64 temp_blks_written
Definition: instrument.h:35
int64 local_blks_read
Definition: instrument.h:31
int64 local_blks_dirtied
Definition: instrument.h:32
int64 shared_blks_hit
Definition: instrument.h:26
int64 temp_blks_written
int64 calls[PGSS_NUMKIND]
int64 wal_buffers_full
int64 parallel_workers_launched
int64 shared_blks_written
double jit_generation_time
int64 temp_blks_read
double min_time[PGSS_NUMKIND]
int64 local_blks_written
int64 generic_plan_calls
double sum_var_time[PGSS_NUMKIND]
double temp_blk_read_time
double local_blk_write_time
int64 jit_emission_count
int64 jit_deform_count
double jit_emission_time
int64 shared_blks_hit
double local_blk_read_time
double jit_optimization_time
double shared_blk_write_time
int64 jit_optimization_count
double total_time[PGSS_NUMKIND]
double max_time[PGSS_NUMKIND]
int64 shared_blks_dirtied
double mean_time[PGSS_NUMKIND]
double temp_blk_write_time
int64 custom_plan_calls
int64 local_blks_dirtied
int64 jit_inlining_count
int64 shared_blks_read
int64 local_blks_hit
double jit_deform_time
int64 parallel_workers_to_launch
int64 local_blks_read
double shared_blk_read_time
double jit_inlining_time
int es_parallel_workers_to_launch
Definition: execnodes.h:746
struct JitContext * es_jit
Definition: execnodes.h:764
uint64 es_total_processed
Definition: execnodes.h:716
MemoryContext es_query_cxt
Definition: execnodes.h:710
int es_parallel_workers_launched
Definition: execnodes.h:748
Size keysize
Definition: hsearch.h:75
Size entrysize
Definition: hsearch.h:76
Definition: dynahash.c:222
WalUsage walusage
Definition: instrument.h:94
BufferUsage bufusage
Definition: instrument.h:93
JitInstrumentation instr
Definition: jit.h:62
instr_time generation_counter
Definition: jit.h:33
size_t created_functions
Definition: jit.h:30
instr_time optimization_counter
Definition: jit.h:42
instr_time deform_counter
Definition: jit.h:36
instr_time emission_counter
Definition: jit.h:45
instr_time inlining_counter
Definition: jit.h:39
bool has_squashed_lists
Definition: queryjumble.h:65
int highest_extern_param_id
Definition: queryjumble.h:62
LocationLen * clocations
Definition: queryjumble.h:47
int clocations_count
Definition: queryjumble.h:53
Definition: lwlock.h:42
bool squashed
Definition: queryjumble.h:28
bool extern_param
Definition: queryjumble.h:31
Definition: nodes.h:135
const char * p_sourcetext
Definition: parse_node.h:195
int64 queryId
Definition: plannodes.h:71
ParseLoc stmt_len
Definition: plannodes.h:165
PlannedStmtOrigin planOrigin
Definition: plannodes.h:77
ParseLoc stmt_location
Definition: plannodes.h:163
Node * utilityStmt
Definition: plannodes.h:150
uint64 nprocessed
Definition: cmdtag.h:32
CommandTag commandTag
Definition: cmdtag.h:31
const char * sourceText
Definition: execdesc.h:38
EState * estate
Definition: execdesc.h:48
PlannedStmt * plannedstmt
Definition: execdesc.h:37
struct Instrumentation * totaltime
Definition: execdesc.h:55
Node * utilityStmt
Definition: parsenodes.h:141
ParseLoc stmt_location
Definition: parsenodes.h:255
TupleDesc setDesc
Definition: execnodes.h:364
Tuplestorestate * setResult
Definition: execnodes.h:363
int64 wal_buffers_full
Definition: instrument.h:57
uint64 wal_bytes
Definition: instrument.h:55
int64 wal_fpi
Definition: instrument.h:54
int64 wal_records
Definition: instrument.h:53
Definition: guc.h:174
Counters counters
pgssHashKey key
TimestampTz minmax_stats_since
TimestampTz stats_since
TimestampTz stats_reset
pgssGlobalStats stats
__int64 st_size
Definition: win32_port.h:263
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:784
void standard_ProcessUtility(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition: utility.c:545
ProcessUtility_hook_type ProcessUtility_hook
Definition: utility.c:71
void(* ProcessUtility_hook_type)(PlannedStmt *pstmt, const char *queryString, bool readOnlyTree, ProcessUtilityContext context, ParamListInfo params, QueryEnvironment *queryEnv, DestReceiver *dest, QueryCompletion *qc)
Definition: utility.h:71
ProcessUtilityContext
Definition: utility.h:21
static Datum TimestampTzGetDatum(TimestampTz X)
Definition: timestamp.h:52
#define PG_RETURN_TIMESTAMPTZ(x)
Definition: timestamp.h:68
const char * name
#define fstat
Definition: win32_port.h:273