Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ jobs:
steps:
- name: Install Postgres Build Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential coreutils libreadline-dev zlib1g-dev flex bison libxml2-dev libxslt-dev libssl-dev libxml2-utils xsltproc git

- name: Checkout postgres repository
Expand Down
60 changes: 41 additions & 19 deletions contrib/pg_prewarm/pg_prewarm.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <sys/stat.h>
#include <unistd.h>

#include "access/heapam.h"
#include "access/relation.h"
#include "fmgr.h"
#include "miscadmin.h"
Expand Down Expand Up @@ -185,29 +186,50 @@ pg_prewarm(PG_FUNCTION_ARGS)
}
else if (ptype == PREWARM_BUFFER)
{
BlockNumber prefetch_block = first_block;
Oid nspOid;
int io_concurrency;
if (get_relkind_objtype(rel->rd_rel->relkind) == OBJECT_TABLE && forkNumber == MAIN_FORKNUM)
{
uint32 scan_flags = SO_TYPE_SEQSCAN | SO_TEMP_SNAPSHOT;
HeapTuple tuple;
Snapshot snapshot;
TableScanDesc scan;
elog(LOG, "pg_prewarm: SeqScan relation \"%s\" starting %ld for %ld blocks", RelationGetRelationName(rel), first_block, last_block - first_block + 1);
// Use heap scan to set hint bits on every tuple. SO_ALLOW_PAGEMODE is intentionally NOT SET.
// Otherwise, when a page is all visible, tuple hint bits won't be set.
snapshot = RegisterSnapshot(GetTransactionSnapshot());
scan = heap_beginscan(rel, snapshot, 0, NULL, NULL, scan_flags);
heap_setscanlimits(scan, first_block, last_block - first_block + 1);
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
{
CHECK_FOR_INTERRUPTS();
}
heap_endscan(scan);
}
else
{
BlockNumber prefetch_block = first_block;
Oid nspOid;
int io_concurrency;

nspOid = rel->rd_rel->reltablespace;
io_concurrency = get_tablespace_maintenance_io_concurrency(nspOid);
nspOid = rel->rd_rel->reltablespace;
io_concurrency = get_tablespace_maintenance_io_concurrency(nspOid);

/*
* In buffer mode, we actually pull the data into shared_buffers.
*/
for (block = first_block; block <= last_block; ++block)
{
Buffer buf;
BlockNumber prefetch_stop = block + Min(last_block - block + 1,
io_concurrency);
CHECK_FOR_INTERRUPTS();
while (prefetch_block < prefetch_stop)
/*
* In buffer mode, we actually pull the data into shared_buffers.
*/
for (block = first_block; block <= last_block; ++block)
{
PrefetchBuffer(rel, forkNumber, prefetch_block++);
Buffer buf;
BlockNumber prefetch_stop = block + Min(last_block - block + 1,
io_concurrency);
CHECK_FOR_INTERRUPTS();
while (prefetch_block < prefetch_stop)
{
PrefetchBuffer(rel, forkNumber, prefetch_block++);
}
buf = ReadBufferExtended(rel, forkNumber, block, RBM_NORMAL, NULL);
ReleaseBuffer(buf);
++blocks_done;
}
buf = ReadBufferExtended(rel, forkNumber, block, RBM_NORMAL, NULL);
ReleaseBuffer(buf);
++blocks_done;
}
}

Expand Down
49 changes: 29 additions & 20 deletions src/backend/access/nbtree/nbtinsert.c
Original file line number Diff line number Diff line change
Expand Up @@ -1472,6 +1472,8 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
Page origpage;
Page leftpage,
rightpage;
PGAlignedBlock leftpage_buf,
rightpage_buf;
BlockNumber origpagenumber,
rightpagenumber;
BTPageOpaque ropaque,
Expand Down Expand Up @@ -1545,8 +1547,8 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
firstrightoff = _bt_findsplitloc(rel, origpage, newitemoff, newitemsz,
newitem, &newitemonleft);

/* Allocate temp buffer for leftpage */
leftpage = PageGetTempPage(origpage);
/* Use temporary buffer for leftpage */
leftpage = leftpage_buf.data;
_bt_pageinit(leftpage, BufferGetPageSize(buf));
lopaque = BTPageGetOpaque(leftpage);

Expand Down Expand Up @@ -1709,19 +1711,23 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,

/*
* Acquire a new right page to split into, now that left page has a new
* high key. From here on, it's not okay to throw an error without
* zeroing rightpage first. This coding rule ensures that we won't
* confuse future VACUUM operations, which might otherwise try to re-find
* a downlink to a leftover junk page as the page undergoes deletion.
* high key.
*
* It would be reasonable to start the critical section just after the new
* rightpage buffer is acquired instead; that would allow us to avoid
* leftover junk pages without bothering to zero rightpage. We do it this
* way because it avoids an unnecessary PANIC when either origpage or its
* existing sibling page are corrupt.
* To not confuse future VACUUM operations, we zero the right page and
* work on an in-memory copy of it before writing WAL, then copy its
* contents back to the actual page once we start the critical section
* work. This simplifies the split work, so as there is no need to zero
* the right page before throwing an error.
*/
rbuf = _bt_allocbuf(rel, heaprel);
rightpage = BufferGetPage(rbuf);
rightpage = rightpage_buf.data;

/*
* Copy the contents of the right page into its temporary location, and
* zero the original space.
*/
memcpy(rightpage, BufferGetPage(rbuf), BLCKSZ);
memset(BufferGetPage(rbuf), 0, BLCKSZ);
rightpagenumber = BufferGetBlockNumber(rbuf);
/* rightpage was initialized by _bt_getbuf */
ropaque = BTPageGetOpaque(rightpage);
Expand Down Expand Up @@ -1770,7 +1776,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
if (PageAddItem(rightpage, (Item) righthighkey, itemsz, afterrightoff,
false, false) == InvalidOffsetNumber)
{
memset(rightpage, 0, BufferGetPageSize(rbuf));
elog(ERROR, "failed to add high key to the right sibling"
" while splitting block %u of index \"%s\"",
origpagenumber, RelationGetRelationName(rel));
Expand Down Expand Up @@ -1818,7 +1823,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
if (!_bt_pgaddtup(leftpage, newitemsz, newitem, afterleftoff,
false))
{
memset(rightpage, 0, BufferGetPageSize(rbuf));
elog(ERROR, "failed to add new item to the left sibling"
" while splitting block %u of index \"%s\"",
origpagenumber, RelationGetRelationName(rel));
Expand All @@ -1831,7 +1835,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
if (!_bt_pgaddtup(rightpage, newitemsz, newitem, afterrightoff,
afterrightoff == minusinfoff))
{
memset(rightpage, 0, BufferGetPageSize(rbuf));
elog(ERROR, "failed to add new item to the right sibling"
" while splitting block %u of index \"%s\"",
origpagenumber, RelationGetRelationName(rel));
Expand All @@ -1845,7 +1848,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
{
if (!_bt_pgaddtup(leftpage, itemsz, dataitem, afterleftoff, false))
{
memset(rightpage, 0, BufferGetPageSize(rbuf));
elog(ERROR, "failed to add old item to the left sibling"
" while splitting block %u of index \"%s\"",
origpagenumber, RelationGetRelationName(rel));
Expand All @@ -1857,7 +1859,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
if (!_bt_pgaddtup(rightpage, itemsz, dataitem, afterrightoff,
afterrightoff == minusinfoff))
{
memset(rightpage, 0, BufferGetPageSize(rbuf));
elog(ERROR, "failed to add old item to the right sibling"
" while splitting block %u of index \"%s\"",
origpagenumber, RelationGetRelationName(rel));
Expand All @@ -1878,7 +1879,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
if (!_bt_pgaddtup(rightpage, newitemsz, newitem, afterrightoff,
afterrightoff == minusinfoff))
{
memset(rightpage, 0, BufferGetPageSize(rbuf));
elog(ERROR, "failed to add new item to the right sibling"
" while splitting block %u of index \"%s\"",
origpagenumber, RelationGetRelationName(rel));
Expand All @@ -1898,7 +1898,6 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
sopaque = BTPageGetOpaque(spage);
if (sopaque->btpo_prev != origpagenumber)
{
memset(rightpage, 0, BufferGetPageSize(rbuf));
ereport(ERROR,
(errcode(ERRCODE_INDEX_CORRUPTED),
errmsg_internal("right sibling's left-link doesn't match: "
Expand Down Expand Up @@ -1941,9 +1940,19 @@ _bt_split(Relation rel, Relation heaprel, BTScanInsert itup_key, Buffer buf,
* original. We need to do this before writing the WAL record, so that
* XLogInsert can WAL log an image of the page if necessary.
*/
PageRestoreTempPage(leftpage, origpage);
memcpy(origpage, leftpage, BLCKSZ);
/* leftpage, lopaque must not be used below here */

/*
* Move the contents of the right page from its temporary location to the
* destination buffer, before writing the WAL record. Unlike the left
* page, the right page and its opaque area are still needed to complete
* the update of the page, so reinitialize them.
*/
rightpage = BufferGetPage(rbuf);
memcpy(rightpage, rightpage_buf.data, BLCKSZ);
ropaque = BTPageGetOpaque(rightpage);

MarkBufferDirty(buf);
MarkBufferDirty(rbuf);

Expand Down