PostgreSQL Source Code git master
pg_buffercache_pages.c
Go to the documentation of this file.
1/*-------------------------------------------------------------------------
2 *
3 * pg_buffercache_pages.c
4 * display some contents of the buffer cache
5 *
6 * contrib/pg_buffercache/pg_buffercache_pages.c
7 *-------------------------------------------------------------------------
8 */
9#include "postgres.h"
10
11#include "access/htup_details.h"
12#include "access/relation.h"
13#include "catalog/pg_type.h"
14#include "funcapi.h"
15#include "port/pg_numa.h"
17#include "storage/bufmgr.h"
18#include "utils/rel.h"
19
20
21#define NUM_BUFFERCACHE_PAGES_MIN_ELEM 8
22#define NUM_BUFFERCACHE_PAGES_ELEM 9
23#define NUM_BUFFERCACHE_SUMMARY_ELEM 5
24#define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM 4
25#define NUM_BUFFERCACHE_EVICT_ELEM 2
26#define NUM_BUFFERCACHE_EVICT_RELATION_ELEM 3
27#define NUM_BUFFERCACHE_EVICT_ALL_ELEM 3
28
29#define NUM_BUFFERCACHE_OS_PAGES_ELEM 3
30
32 .name = "pg_buffercache",
33 .version = PG_VERSION
34);
35
36/*
37 * Record structure holding the to be exposed cache data.
38 */
39typedef struct
40{
47 bool isvalid;
48 bool isdirty;
50
51 /*
52 * An int32 is sufficiently large, as MAX_BACKENDS prevents a buffer from
53 * being pinned by too many backends and each backend will only pin once
54 * because of bufmgr.c's PrivateRefCount infrastructure.
55 */
58
59
60/*
61 * Function context for data persisting over repeated calls.
62 */
63typedef struct
64{
68
69/*
70 * Record structure holding the to be exposed cache data for OS pages. This
71 * structure is used by pg_buffercache_os_pages(), where NUMA information may
72 * or may not be included.
73 */
74typedef struct
75{
80
81/*
82 * Function context for data persisting over repeated calls.
83 */
84typedef struct
85{
90
91
92/*
93 * Function returning data from the shared buffer cache - buffer number,
94 * relation node/tablespace/database/blocknum and dirty indicator.
95 */
104
105
106/* Only need to touch memory once per backend process lifetime */
107static bool firstNumaTouch = true;
108
109
110Datum
112{
113 FuncCallContext *funcctx;
114 Datum result;
115 MemoryContext oldcontext;
116 BufferCachePagesContext *fctx; /* User function context. */
117 TupleDesc tupledesc;
118 TupleDesc expected_tupledesc;
119 HeapTuple tuple;
120
121 if (SRF_IS_FIRSTCALL())
122 {
123 int i;
124
125 funcctx = SRF_FIRSTCALL_INIT();
126
127 /* Switch context when allocating stuff to be used in later calls */
128 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
129
130 /* Create a user function context for cross-call persistence */
132
133 /*
134 * To smoothly support upgrades from version 1.0 of this extension
135 * transparently handle the (non-)existence of the pinning_backends
136 * column. We unfortunately have to get the result type for that... -
137 * we can't use the result type determined by the function definition
138 * without potentially crashing when somebody uses the old (or even
139 * wrong) function definition though.
140 */
141 if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
142 elog(ERROR, "return type must be a row type");
143
144 if (expected_tupledesc->natts < NUM_BUFFERCACHE_PAGES_MIN_ELEM ||
145 expected_tupledesc->natts > NUM_BUFFERCACHE_PAGES_ELEM)
146 elog(ERROR, "incorrect number of output arguments");
147
148 /* Construct a tuple descriptor for the result rows. */
149 tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
150 TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
151 INT4OID, -1, 0);
152 TupleDescInitEntry(tupledesc, (AttrNumber) 2, "relfilenode",
153 OIDOID, -1, 0);
154 TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reltablespace",
155 OIDOID, -1, 0);
156 TupleDescInitEntry(tupledesc, (AttrNumber) 4, "reldatabase",
157 OIDOID, -1, 0);
158 TupleDescInitEntry(tupledesc, (AttrNumber) 5, "relforknumber",
159 INT2OID, -1, 0);
160 TupleDescInitEntry(tupledesc, (AttrNumber) 6, "relblocknumber",
161 INT8OID, -1, 0);
162 TupleDescInitEntry(tupledesc, (AttrNumber) 7, "isdirty",
163 BOOLOID, -1, 0);
164 TupleDescInitEntry(tupledesc, (AttrNumber) 8, "usage_count",
165 INT2OID, -1, 0);
166
167 if (expected_tupledesc->natts == NUM_BUFFERCACHE_PAGES_ELEM)
168 TupleDescInitEntry(tupledesc, (AttrNumber) 9, "pinning_backends",
169 INT4OID, -1, 0);
170
171 fctx->tupdesc = BlessTupleDesc(tupledesc);
172
173 /* Allocate NBuffers worth of BufferCachePagesRec records. */
174 fctx->record = (BufferCachePagesRec *)
176 sizeof(BufferCachePagesRec) * NBuffers);
177
178 /* Set max calls and remember the user function context. */
179 funcctx->max_calls = NBuffers;
180 funcctx->user_fctx = fctx;
181
182 /* Return to original context when allocating transient memory */
183 MemoryContextSwitchTo(oldcontext);
184
185 /*
186 * Scan through all the buffers, saving the relevant fields in the
187 * fctx->record structure.
188 *
189 * We don't hold the partition locks, so we don't get a consistent
190 * snapshot across all buffers, but we do grab the buffer header
191 * locks, so the information of each buffer is self-consistent.
192 */
193 for (i = 0; i < NBuffers; i++)
194 {
195 BufferDesc *bufHdr;
196 uint32 buf_state;
197
199
200 bufHdr = GetBufferDescriptor(i);
201 /* Lock each buffer header before inspecting. */
202 buf_state = LockBufHdr(bufHdr);
203
205 fctx->record[i].relfilenumber = BufTagGetRelNumber(&bufHdr->tag);
206 fctx->record[i].reltablespace = bufHdr->tag.spcOid;
207 fctx->record[i].reldatabase = bufHdr->tag.dbOid;
208 fctx->record[i].forknum = BufTagGetForkNum(&bufHdr->tag);
209 fctx->record[i].blocknum = bufHdr->tag.blockNum;
210 fctx->record[i].usagecount = BUF_STATE_GET_USAGECOUNT(buf_state);
212
213 if (buf_state & BM_DIRTY)
214 fctx->record[i].isdirty = true;
215 else
216 fctx->record[i].isdirty = false;
217
218 /* Note if the buffer is valid, and has storage created */
219 if ((buf_state & BM_VALID) && (buf_state & BM_TAG_VALID))
220 fctx->record[i].isvalid = true;
221 else
222 fctx->record[i].isvalid = false;
223
224 UnlockBufHdr(bufHdr);
225 }
226 }
227
228 funcctx = SRF_PERCALL_SETUP();
229
230 /* Get the saved state */
231 fctx = funcctx->user_fctx;
232
233 if (funcctx->call_cntr < funcctx->max_calls)
234 {
235 uint32 i = funcctx->call_cntr;
237 bool nulls[NUM_BUFFERCACHE_PAGES_ELEM];
238
239 values[0] = Int32GetDatum(fctx->record[i].bufferid);
240 nulls[0] = false;
241
242 /*
243 * Set all fields except the bufferid to null if the buffer is unused
244 * or not valid.
245 */
246 if (fctx->record[i].blocknum == InvalidBlockNumber ||
247 fctx->record[i].isvalid == false)
248 {
249 nulls[1] = true;
250 nulls[2] = true;
251 nulls[3] = true;
252 nulls[4] = true;
253 nulls[5] = true;
254 nulls[6] = true;
255 nulls[7] = true;
256 /* unused for v1.0 callers, but the array is always long enough */
257 nulls[8] = true;
258 }
259 else
260 {
262 nulls[1] = false;
264 nulls[2] = false;
266 nulls[3] = false;
267 values[4] = Int16GetDatum(fctx->record[i].forknum);
268 nulls[4] = false;
269 values[5] = Int64GetDatum((int64) fctx->record[i].blocknum);
270 nulls[5] = false;
271 values[6] = BoolGetDatum(fctx->record[i].isdirty);
272 nulls[6] = false;
274 nulls[7] = false;
275 /* unused for v1.0 callers, but the array is always long enough */
277 nulls[8] = false;
278 }
279
280 /* Build and return the tuple. */
281 tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
282 result = HeapTupleGetDatum(tuple);
283
284 SRF_RETURN_NEXT(funcctx, result);
285 }
286 else
287 SRF_RETURN_DONE(funcctx);
288}
289
290/*
291 * Inquire about OS pages mappings for shared buffers, with NUMA information,
292 * optionally.
293 *
294 * When "include_numa" is false, this routines ignores everything related
295 * to NUMA (returned as NULL values), returning mapping information between
296 * shared buffers and OS pages.
297 *
298 * When "include_numa" is true, NUMA is initialized and numa_node values
299 * are generated. In order to get reliable results we also need to touch
300 * memory pages, so that the inquiry about NUMA memory node does not return
301 * -2, indicating unmapped/unallocated pages.
302 *
303 * Buffers may be smaller or larger than OS memory pages. For each buffer we
304 * return one entry for each memory page used by the buffer (if the buffer is
305 * smaller, it only uses a part of one memory page).
306 *
307 * We expect both sizes (for buffers and memory pages) to be a power-of-2, so
308 * one is always a multiple of the other.
309 *
310 */
311static Datum
313{
314 FuncCallContext *funcctx;
315 MemoryContext oldcontext;
316 BufferCacheOsPagesContext *fctx; /* User function context. */
317 TupleDesc tupledesc;
318 TupleDesc expected_tupledesc;
319 HeapTuple tuple;
320 Datum result;
321
322 if (SRF_IS_FIRSTCALL())
323 {
324 int i,
325 idx;
326 Size os_page_size;
327 int pages_per_buffer;
328 int *os_page_status = NULL;
329 uint64 os_page_count = 0;
330 int max_entries;
331 char *startptr,
332 *endptr;
333
334 /* If NUMA information is requested, initialize NUMA support. */
335 if (include_numa && pg_numa_init() == -1)
336 elog(ERROR, "libnuma initialization failed or NUMA is not supported on this platform");
337
338 /*
339 * The database block size and OS memory page size are unlikely to be
340 * the same. The block size is 1-32KB, the memory page size depends on
341 * platform. On x86 it's usually 4KB, on ARM it's 4KB or 64KB, but
342 * there are also features like THP etc. Moreover, we don't quite know
343 * how the pages and buffers "align" in memory - the buffers may be
344 * shifted in some way, using more memory pages than necessary.
345 *
346 * So we need to be careful about mapping buffers to memory pages. We
347 * calculate the maximum number of pages a buffer might use, so that
348 * we allocate enough space for the entries. And then we count the
349 * actual number of entries as we scan the buffers.
350 *
351 * This information is needed before calling move_pages() for NUMA
352 * node id inquiry.
353 */
354 os_page_size = pg_get_shmem_pagesize();
355
356 /*
357 * The pages and block size is expected to be 2^k, so one divides the
358 * other (we don't know in which direction). This does not say
359 * anything about relative alignment of pages/buffers.
360 */
361 Assert((os_page_size % BLCKSZ == 0) || (BLCKSZ % os_page_size == 0));
362
363 if (include_numa)
364 {
365 void **os_page_ptrs = NULL;
366
367 /*
368 * How many addresses we are going to query? Simply get the page
369 * for the first buffer, and first page after the last buffer, and
370 * count the pages from that.
371 */
372 startptr = (char *) TYPEALIGN_DOWN(os_page_size,
373 BufferGetBlock(1));
374 endptr = (char *) TYPEALIGN(os_page_size,
375 (char *) BufferGetBlock(NBuffers) + BLCKSZ);
376 os_page_count = (endptr - startptr) / os_page_size;
377
378 /* Used to determine the NUMA node for all OS pages at once */
379 os_page_ptrs = palloc0(sizeof(void *) * os_page_count);
380 os_page_status = palloc(sizeof(uint64) * os_page_count);
381
382 /*
383 * Fill pointers for all the memory pages. This loop stores and
384 * touches (if needed) addresses into os_page_ptrs[] as input to
385 * one big move_pages(2) inquiry system call, as done in
386 * pg_numa_query_pages().
387 */
388 idx = 0;
389 for (char *ptr = startptr; ptr < endptr; ptr += os_page_size)
390 {
391 os_page_ptrs[idx++] = ptr;
392
393 /* Only need to touch memory once per backend process lifetime */
394 if (firstNumaTouch)
396 }
397
398 Assert(idx == os_page_count);
399
400 elog(DEBUG1, "NUMA: NBuffers=%d os_page_count=" UINT64_FORMAT " "
401 "os_page_size=%zu", NBuffers, os_page_count, os_page_size);
402
403 /*
404 * If we ever get 0xff back from kernel inquiry, then we probably
405 * have bug in our buffers to OS page mapping code here.
406 */
407 memset(os_page_status, 0xff, sizeof(int) * os_page_count);
408
409 /* Query NUMA status for all the pointers */
410 if (pg_numa_query_pages(0, os_page_count, os_page_ptrs, os_page_status) == -1)
411 elog(ERROR, "failed NUMA pages inquiry: %m");
412 }
413
414 /* Initialize the multi-call context, load entries about buffers */
415
416 funcctx = SRF_FIRSTCALL_INIT();
417
418 /* Switch context when allocating stuff to be used in later calls */
419 oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
420
421 /* Create a user function context for cross-call persistence */
423
424 if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
425 elog(ERROR, "return type must be a row type");
426
427 if (expected_tupledesc->natts != NUM_BUFFERCACHE_OS_PAGES_ELEM)
428 elog(ERROR, "incorrect number of output arguments");
429
430 /* Construct a tuple descriptor for the result rows. */
431 tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
432 TupleDescInitEntry(tupledesc, (AttrNumber) 1, "bufferid",
433 INT4OID, -1, 0);
434 TupleDescInitEntry(tupledesc, (AttrNumber) 2, "os_page_num",
435 INT8OID, -1, 0);
436 TupleDescInitEntry(tupledesc, (AttrNumber) 3, "numa_node",
437 INT4OID, -1, 0);
438
439 fctx->tupdesc = BlessTupleDesc(tupledesc);
440 fctx->include_numa = include_numa;
441
442 /*
443 * Each buffer needs at least one entry, but it might be offset in
444 * some way, and use one extra entry. So we allocate space for the
445 * maximum number of entries we might need, and then count the exact
446 * number as we're walking buffers. That way we can do it in one pass,
447 * without reallocating memory.
448 */
449 pages_per_buffer = Max(1, BLCKSZ / os_page_size) + 1;
450 max_entries = NBuffers * pages_per_buffer;
451
452 /* Allocate entries for BufferCacheOsPagesRec records. */
453 fctx->record = (BufferCacheOsPagesRec *)
455 sizeof(BufferCacheOsPagesRec) * max_entries);
456
457 /* Return to original context when allocating transient memory */
458 MemoryContextSwitchTo(oldcontext);
459
460 if (include_numa && firstNumaTouch)
461 elog(DEBUG1, "NUMA: page-faulting the buffercache for proper NUMA readouts");
462
463 /*
464 * Scan through all the buffers, saving the relevant fields in the
465 * fctx->record structure.
466 *
467 * We don't hold the partition locks, so we don't get a consistent
468 * snapshot across all buffers, but we do grab the buffer header
469 * locks, so the information of each buffer is self-consistent.
470 */
471 startptr = (char *) TYPEALIGN_DOWN(os_page_size, (char *) BufferGetBlock(1));
472 idx = 0;
473 for (i = 0; i < NBuffers; i++)
474 {
475 char *buffptr = (char *) BufferGetBlock(i + 1);
476 BufferDesc *bufHdr;
477 uint32 bufferid;
478 int32 page_num;
479 char *startptr_buff,
480 *endptr_buff;
481
483
484 bufHdr = GetBufferDescriptor(i);
485
486 /* Lock each buffer header before inspecting. */
487 LockBufHdr(bufHdr);
488 bufferid = BufferDescriptorGetBuffer(bufHdr);
489 UnlockBufHdr(bufHdr);
490
491 /* start of the first page of this buffer */
492 startptr_buff = (char *) TYPEALIGN_DOWN(os_page_size, buffptr);
493
494 /* end of the buffer (no need to align to memory page) */
495 endptr_buff = buffptr + BLCKSZ;
496
497 Assert(startptr_buff < endptr_buff);
498
499 /* calculate ID of the first page for this buffer */
500 page_num = (startptr_buff - startptr) / os_page_size;
501
502 /* Add an entry for each OS page overlapping with this buffer. */
503 for (char *ptr = startptr_buff; ptr < endptr_buff; ptr += os_page_size)
504 {
505 fctx->record[idx].bufferid = bufferid;
506 fctx->record[idx].page_num = page_num;
507 fctx->record[idx].numa_node = include_numa ? os_page_status[page_num] : -1;
508
509 /* advance to the next entry/page */
510 ++idx;
511 ++page_num;
512 }
513 }
514
515 Assert(idx <= max_entries);
516
517 if (include_numa)
518 Assert(idx >= os_page_count);
519
520 /* Set max calls and remember the user function context. */
521 funcctx->max_calls = idx;
522 funcctx->user_fctx = fctx;
523
524 /* Remember this backend touched the pages (only relevant for NUMA) */
525 if (include_numa)
526 firstNumaTouch = false;
527 }
528
529 funcctx = SRF_PERCALL_SETUP();
530
531 /* Get the saved state */
532 fctx = funcctx->user_fctx;
533
534 if (funcctx->call_cntr < funcctx->max_calls)
535 {
536 uint32 i = funcctx->call_cntr;
539
540 values[0] = Int32GetDatum(fctx->record[i].bufferid);
541 nulls[0] = false;
542
543 values[1] = Int64GetDatum(fctx->record[i].page_num);
544 nulls[1] = false;
545
546 if (fctx->include_numa)
547 {
549 nulls[2] = false;
550 }
551 else
552 {
553 values[2] = (Datum) 0;
554 nulls[2] = true;
555 }
556
557 /* Build and return the tuple. */
558 tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
559 result = HeapTupleGetDatum(tuple);
560
561 SRF_RETURN_NEXT(funcctx, result);
562 }
563 else
564 SRF_RETURN_DONE(funcctx);
565}
566
567/*
568 * pg_buffercache_os_pages
569 *
570 * Retrieve information about OS pages, with or without NUMA information.
571 */
572Datum
574{
575 bool include_numa;
576
577 /* Get the boolean parameter that controls the NUMA behavior. */
578 include_numa = PG_GETARG_BOOL(0);
579
580 return pg_buffercache_os_pages_internal(fcinfo, include_numa);
581}
582
583/* Backward-compatible wrapper for v1.6. */
584Datum
586{
587 /* Call internal function with include_numa=true */
588 return pg_buffercache_os_pages_internal(fcinfo, true);
589}
590
591Datum
593{
594 Datum result;
595 TupleDesc tupledesc;
596 HeapTuple tuple;
599
600 int32 buffers_used = 0;
601 int32 buffers_unused = 0;
602 int32 buffers_dirty = 0;
603 int32 buffers_pinned = 0;
604 int64 usagecount_total = 0;
605
606 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
607 elog(ERROR, "return type must be a row type");
608
609 for (int i = 0; i < NBuffers; i++)
610 {
611 BufferDesc *bufHdr;
612 uint32 buf_state;
613
615
616 /*
617 * This function summarizes the state of all headers. Locking the
618 * buffer headers wouldn't provide an improved result as the state of
619 * the buffer can still change after we release the lock and it'd
620 * noticeably increase the cost of the function.
621 */
622 bufHdr = GetBufferDescriptor(i);
623 buf_state = pg_atomic_read_u32(&bufHdr->state);
624
625 if (buf_state & BM_VALID)
626 {
627 buffers_used++;
628 usagecount_total += BUF_STATE_GET_USAGECOUNT(buf_state);
629
630 if (buf_state & BM_DIRTY)
631 buffers_dirty++;
632 }
633 else
634 buffers_unused++;
635
636 if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
637 buffers_pinned++;
638 }
639
640 memset(nulls, 0, sizeof(nulls));
641 values[0] = Int32GetDatum(buffers_used);
642 values[1] = Int32GetDatum(buffers_unused);
643 values[2] = Int32GetDatum(buffers_dirty);
644 values[3] = Int32GetDatum(buffers_pinned);
645
646 if (buffers_used != 0)
647 values[4] = Float8GetDatum((double) usagecount_total / buffers_used);
648 else
649 nulls[4] = true;
650
651 /* Build and return the tuple. */
652 tuple = heap_form_tuple(tupledesc, values, nulls);
653 result = HeapTupleGetDatum(tuple);
654
655 PG_RETURN_DATUM(result);
656}
657
658Datum
660{
661 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
662 int usage_counts[BM_MAX_USAGE_COUNT + 1] = {0};
663 int dirty[BM_MAX_USAGE_COUNT + 1] = {0};
664 int pinned[BM_MAX_USAGE_COUNT + 1] = {0};
666 bool nulls[NUM_BUFFERCACHE_USAGE_COUNTS_ELEM] = {0};
667
668 InitMaterializedSRF(fcinfo, 0);
669
670 for (int i = 0; i < NBuffers; i++)
671 {
673 uint32 buf_state = pg_atomic_read_u32(&bufHdr->state);
674 int usage_count;
675
677
678 usage_count = BUF_STATE_GET_USAGECOUNT(buf_state);
679 usage_counts[usage_count]++;
680
681 if (buf_state & BM_DIRTY)
682 dirty[usage_count]++;
683
684 if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
685 pinned[usage_count]++;
686 }
687
688 for (int i = 0; i < BM_MAX_USAGE_COUNT + 1; i++)
689 {
690 values[0] = Int32GetDatum(i);
691 values[1] = Int32GetDatum(usage_counts[i]);
692 values[2] = Int32GetDatum(dirty[i]);
693 values[3] = Int32GetDatum(pinned[i]);
694
695 tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
696 }
697
698 return (Datum) 0;
699}
700
701/*
702 * Helper function to check if the user has superuser privileges.
703 */
704static void
706{
707 if (!superuser())
709 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
710 errmsg("must be superuser to use %s()",
711 func_name)));
712}
713
714/*
715 * Try to evict a shared buffer.
716 */
717Datum
719{
720 Datum result;
721 TupleDesc tupledesc;
722 HeapTuple tuple;
724 bool nulls[NUM_BUFFERCACHE_EVICT_ELEM] = {0};
725
727 bool buffer_flushed;
728
729 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
730 elog(ERROR, "return type must be a row type");
731
732 pg_buffercache_superuser_check("pg_buffercache_evict");
733
734 if (buf < 1 || buf > NBuffers)
735 elog(ERROR, "bad buffer ID: %d", buf);
736
737 values[0] = BoolGetDatum(EvictUnpinnedBuffer(buf, &buffer_flushed));
738 values[1] = BoolGetDatum(buffer_flushed);
739
740 tuple = heap_form_tuple(tupledesc, values, nulls);
741 result = HeapTupleGetDatum(tuple);
742
743 PG_RETURN_DATUM(result);
744}
745
746/*
747 * Try to evict specified relation.
748 */
749Datum
751{
752 Datum result;
753 TupleDesc tupledesc;
754 HeapTuple tuple;
756 bool nulls[NUM_BUFFERCACHE_EVICT_RELATION_ELEM] = {0};
757
758 Oid relOid;
759 Relation rel;
760
761 int32 buffers_evicted = 0;
762 int32 buffers_flushed = 0;
763 int32 buffers_skipped = 0;
764
765 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
766 elog(ERROR, "return type must be a row type");
767
768 pg_buffercache_superuser_check("pg_buffercache_evict_relation");
769
770 relOid = PG_GETARG_OID(0);
771
772 rel = relation_open(relOid, AccessShareLock);
773
776 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
777 errmsg("relation uses local buffers, %s() is intended to be used for shared buffers only",
778 "pg_buffercache_evict_relation")));
779
780 EvictRelUnpinnedBuffers(rel, &buffers_evicted, &buffers_flushed,
781 &buffers_skipped);
782
784
785 values[0] = Int32GetDatum(buffers_evicted);
786 values[1] = Int32GetDatum(buffers_flushed);
787 values[2] = Int32GetDatum(buffers_skipped);
788
789 tuple = heap_form_tuple(tupledesc, values, nulls);
790 result = HeapTupleGetDatum(tuple);
791
792 PG_RETURN_DATUM(result);
793}
794
795
796/*
797 * Try to evict all shared buffers.
798 */
799Datum
801{
802 Datum result;
803 TupleDesc tupledesc;
804 HeapTuple tuple;
806 bool nulls[NUM_BUFFERCACHE_EVICT_ALL_ELEM] = {0};
807
808 int32 buffers_evicted = 0;
809 int32 buffers_flushed = 0;
810 int32 buffers_skipped = 0;
811
812 if (get_call_result_type(fcinfo, NULL, &tupledesc) != TYPEFUNC_COMPOSITE)
813 elog(ERROR, "return type must be a row type");
814
815 pg_buffercache_superuser_check("pg_buffercache_evict_all");
816
817 EvictAllUnpinnedBuffers(&buffers_evicted, &buffers_flushed,
818 &buffers_skipped);
819
820 values[0] = Int32GetDatum(buffers_evicted);
821 values[1] = Int32GetDatum(buffers_flushed);
822 values[2] = Int32GetDatum(buffers_skipped);
823
824 tuple = heap_form_tuple(tupledesc, values, nulls);
825 result = HeapTupleGetDatum(tuple);
826
827 PG_RETURN_DATUM(result);
828}
Datum idx(PG_FUNCTION_ARGS)
Definition: _int_op.c:262
static uint32 pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
Definition: atomics.h:237
int16 AttrNumber
Definition: attnum.h:21
uint32 BlockNumber
Definition: block.h:31
#define InvalidBlockNumber
Definition: block.h:33
static Datum values[MAXATTR]
Definition: bootstrap.c:153
int Buffer
Definition: buf.h:23
#define BM_MAX_USAGE_COUNT
Definition: buf_internals.h:86
#define BM_TAG_VALID
Definition: buf_internals.h:71
static ForkNumber BufTagGetForkNum(const BufferTag *tag)
static RelFileNumber BufTagGetRelNumber(const BufferTag *tag)
static void UnlockBufHdr(BufferDesc *desc)
#define BM_DIRTY
Definition: buf_internals.h:69
#define BUF_STATE_GET_USAGECOUNT(state)
Definition: buf_internals.h:60
#define BUF_STATE_GET_REFCOUNT(state)
Definition: buf_internals.h:59
#define BM_VALID
Definition: buf_internals.h:70
static BufferDesc * GetBufferDescriptor(uint32 id)
static Buffer BufferDescriptorGetBuffer(const BufferDesc *bdesc)
void EvictAllUnpinnedBuffers(int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
Definition: bufmgr.c:6683
void EvictRelUnpinnedBuffers(Relation rel, int32 *buffers_evicted, int32 *buffers_flushed, int32 *buffers_skipped)
Definition: bufmgr.c:6733
bool EvictUnpinnedBuffer(Buffer buf, bool *buffer_flushed)
Definition: bufmgr.c:6654
uint32 LockBufHdr(BufferDesc *desc)
Definition: bufmgr.c:6264
static Block BufferGetBlock(Buffer buffer)
Definition: bufmgr.h:392
#define TYPEALIGN(ALIGNVAL, LEN)
Definition: c.h:808
#define Max(x, y)
Definition: c.h:1002
int64_t int64
Definition: c.h:540
#define UINT64_FORMAT
Definition: c.h:562
int32_t int32
Definition: c.h:539
uint64_t uint64
Definition: c.h:544
uint16_t uint16
Definition: c.h:542
uint32_t uint32
Definition: c.h:543
size_t Size
Definition: c.h:615
#define TYPEALIGN_DOWN(ALIGNVAL, LEN)
Definition: c.h:820
int errcode(int sqlerrcode)
Definition: elog.c:863
int errmsg(const char *fmt,...)
Definition: elog.c:1080
#define DEBUG1
Definition: elog.h:30
#define ERROR
Definition: elog.h:39
#define elog(elevel,...)
Definition: elog.h:226
#define ereport(elevel,...)
Definition: elog.h:150
TupleDesc BlessTupleDesc(TupleDesc tupdesc)
Definition: execTuples.c:2260
#define PG_GETARG_OID(n)
Definition: fmgr.h:275
#define PG_GETARG_INT32(n)
Definition: fmgr.h:269
#define PG_GETARG_BOOL(n)
Definition: fmgr.h:274
#define PG_RETURN_DATUM(x)
Definition: fmgr.h:353
#define PG_FUNCTION_ARGS
Definition: fmgr.h:193
void InitMaterializedSRF(FunctionCallInfo fcinfo, bits32 flags)
Definition: funcapi.c:76
TypeFuncClass get_call_result_type(FunctionCallInfo fcinfo, Oid *resultTypeId, TupleDesc *resultTupleDesc)
Definition: funcapi.c:276
#define SRF_IS_FIRSTCALL()
Definition: funcapi.h:304
#define SRF_PERCALL_SETUP()
Definition: funcapi.h:308
@ TYPEFUNC_COMPOSITE
Definition: funcapi.h:149
#define SRF_RETURN_NEXT(_funcctx, _result)
Definition: funcapi.h:310
#define SRF_FIRSTCALL_INIT()
Definition: funcapi.h:306
static Datum HeapTupleGetDatum(const HeapTupleData *tuple)
Definition: funcapi.h:230
#define SRF_RETURN_DONE(_funcctx)
Definition: funcapi.h:328
int NBuffers
Definition: globals.c:142
Assert(PointerIsAligned(start, uint64))
HeapTuple heap_form_tuple(TupleDesc tupleDescriptor, const Datum *values, const bool *isnull)
Definition: heaptuple.c:1117
int i
Definition: isn.c:77
#define AccessShareLock
Definition: lockdefs.h:36
void * palloc0(Size size)
Definition: mcxt.c:1395
void * palloc(Size size)
Definition: mcxt.c:1365
MemoryContext CurrentMemoryContext
Definition: mcxt.c:160
void * MemoryContextAllocHuge(MemoryContext context, Size size)
Definition: mcxt.c:1703
#define CHECK_FOR_INTERRUPTS()
Definition: miscadmin.h:123
static MemoryContext MemoryContextSwitchTo(MemoryContext context)
Definition: palloc.h:124
Datum pg_buffercache_os_pages(PG_FUNCTION_ARGS)
PG_MODULE_MAGIC_EXT(.name="pg_buffercache",.version=PG_VERSION)
Datum pg_buffercache_evict_relation(PG_FUNCTION_ARGS)
#define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM
#define NUM_BUFFERCACHE_OS_PAGES_ELEM
Datum pg_buffercache_evict(PG_FUNCTION_ARGS)
Datum pg_buffercache_summary(PG_FUNCTION_ARGS)
static void pg_buffercache_superuser_check(char *func_name)
PG_FUNCTION_INFO_V1(pg_buffercache_pages)
Datum pg_buffercache_usage_counts(PG_FUNCTION_ARGS)
#define NUM_BUFFERCACHE_SUMMARY_ELEM
Datum pg_buffercache_pages(PG_FUNCTION_ARGS)
#define NUM_BUFFERCACHE_EVICT_ELEM
#define NUM_BUFFERCACHE_PAGES_MIN_ELEM
#define NUM_BUFFERCACHE_EVICT_ALL_ELEM
Datum pg_buffercache_evict_all(PG_FUNCTION_ARGS)
#define NUM_BUFFERCACHE_PAGES_ELEM
Datum pg_buffercache_numa_pages(PG_FUNCTION_ARGS)
static bool firstNumaTouch
static Datum pg_buffercache_os_pages_internal(FunctionCallInfo fcinfo, bool include_numa)
#define NUM_BUFFERCACHE_EVICT_RELATION_ELEM
#define pg_numa_touch_mem_if_required(ptr)
Definition: pg_numa.h:37
PGDLLIMPORT int pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status)
Definition: pg_numa.c:130
PGDLLIMPORT int pg_numa_init(void)
Definition: pg_numa.c:123
static char * buf
Definition: pg_test_fsync.c:72
static Datum Int64GetDatum(int64 X)
Definition: postgres.h:403
static Datum Int16GetDatum(int16 X)
Definition: postgres.h:182
static Datum BoolGetDatum(bool X)
Definition: postgres.h:112
static Datum ObjectIdGetDatum(Oid X)
Definition: postgres.h:262
uint64_t Datum
Definition: postgres.h:70
static Datum Float8GetDatum(float8 X)
Definition: postgres.h:492
static Datum Int32GetDatum(int32 X)
Definition: postgres.h:222
unsigned int Oid
Definition: postgres_ext.h:32
#define RelationUsesLocalBuffers(relation)
Definition: rel.h:647
Oid RelFileNumber
Definition: relpath.h:25
ForkNumber
Definition: relpath.h:56
Size pg_get_shmem_pagesize(void)
Definition: shmem.c:738
void relation_close(Relation relation, LOCKMODE lockmode)
Definition: relation.c:205
Relation relation_open(Oid relationId, LOCKMODE lockmode)
Definition: relation.c:47
BufferCacheOsPagesRec * record
BufferCachePagesRec * record
BufferTag tag
pg_atomic_uint32 state
void * user_fctx
Definition: funcapi.h:82
uint64 max_calls
Definition: funcapi.h:74
uint64 call_cntr
Definition: funcapi.h:65
MemoryContext multi_call_memory_ctx
Definition: funcapi.h:101
BlockNumber blockNum
Oid spcOid
bool superuser(void)
Definition: superuser.c:46
TupleDesc CreateTemplateTupleDesc(int natts)
Definition: tupdesc.c:182
void TupleDescInitEntry(TupleDesc desc, AttrNumber attributeNumber, const char *attributeName, Oid oidtypeid, int32 typmod, int attdim)
Definition: tupdesc.c:842
void tuplestore_putvalues(Tuplestorestate *state, TupleDesc tdesc, const Datum *values, const bool *isnull)
Definition: tuplestore.c:784
const char * name