Index: contrib/pg_buffercache/README.pg_buffercache =================================================================== *** contrib/pg_buffercache/README.pg_buffercache (revision 1) --- contrib/pg_buffercache/README.pg_buffercache (working copy) *************** *** 40,46 **** reldatabase | pg_database.oid | Database for the relation. relblocknumber | | Offset of the page in the relation. isdirty | | Is the page dirty? ! There is one row for each buffer in the shared cache. Unused buffers are shown with all fields null except bufferid. --- 40,46 ---- reldatabase | pg_database.oid | Database for the relation. relblocknumber | | Offset of the page in the relation. isdirty | | Is the page dirty? ! usagecount | | Page LRU count There is one row for each buffer in the shared cache. Unused buffers are shown with all fields null except bufferid. *************** *** 60,79 **** regression=# \d pg_buffercache; View "public.pg_buffercache" ! Column | Type | Modifiers ! ----------------+---------+----------- ! bufferid | integer | ! relfilenode | oid | ! reltablespace | oid | ! reldatabase | oid | ! relblocknumber | bigint | ! isdirty | boolean | View definition: SELECT p.bufferid, p.relfilenode, p.reltablespace, p.reldatabase, ! p.relblocknumber, p.isdirty FROM pg_buffercache_pages() p(bufferid integer, relfilenode oid, reltablespace oid, reldatabase oid, relblocknumber bigint, ! isdirty boolean); regression=# SELECT c.relname, count(*) AS buffers FROM pg_class c INNER JOIN pg_buffercache b --- 60,81 ---- regression=# \d pg_buffercache; View "public.pg_buffercache" ! Column | Type | Modifiers ! ----------------+----------+----------- ! bufferid | integer | ! relfilenode | oid | ! reltablespace | oid | ! reldatabase | oid | ! relblocknumber | bigint | ! isdirty | boolean | ! usagecount | smallint | ! View definition: SELECT p.bufferid, p.relfilenode, p.reltablespace, p.reldatabase, ! p.relblocknumber, p.isdirty, p.usagecount FROM pg_buffercache_pages() p(bufferid integer, relfilenode oid, reltablespace oid, reldatabase oid, relblocknumber bigint, ! isdirty boolean, usagecount smallint); regression=# SELECT c.relname, count(*) AS buffers FROM pg_class c INNER JOIN pg_buffercache b Index: contrib/pg_buffercache/pg_buffercache_pages.c =================================================================== *** contrib/pg_buffercache/pg_buffercache_pages.c (revision 1) --- contrib/pg_buffercache/pg_buffercache_pages.c (working copy) *************** *** 16,22 **** #include "utils/relcache.h" ! #define NUM_BUFFERCACHE_PAGES_ELEM 6 PG_MODULE_MAGIC; --- 16,22 ---- #include "utils/relcache.h" ! #define NUM_BUFFERCACHE_PAGES_ELEM 7 PG_MODULE_MAGIC; *************** *** 35,40 **** --- 35,41 ---- BlockNumber blocknum; bool isvalid; bool isdirty; + uint16 usagecount; } BufferCachePagesRec; *************** *** 91,96 **** --- 92,99 ---- INT8OID, -1, 0); TupleDescInitEntry(tupledesc, (AttrNumber) 6, "isdirty", BOOLOID, -1, 0); + TupleDescInitEntry(tupledesc, (AttrNumber) 7, "usage_count", + INT2OID, -1, 0); fctx->tupdesc = BlessTupleDesc(tupledesc); *************** *** 126,131 **** --- 129,135 ---- fctx->record[i].reltablespace = bufHdr->tag.rnode.spcNode; fctx->record[i].reldatabase = bufHdr->tag.rnode.dbNode; fctx->record[i].blocknum = bufHdr->tag.blockNum; + fctx->record[i].usagecount = bufHdr->usage_count; if (bufHdr->flags & BM_DIRTY) fctx->record[i].isdirty = true; *************** *** 172,177 **** --- 176,182 ---- nulls[3] = true; nulls[4] = true; nulls[5] = true; + nulls[6] = true; } else { *************** *** 185,190 **** --- 190,197 ---- nulls[4] = false; values[5] = BoolGetDatum(fctx->record[i].isdirty); nulls[5] = false; + values[6] = Int16GetDatum(fctx->record[i].usagecount); + nulls[6] = false; } /* Build and return the tuple. */ Index: contrib/pg_buffercache/pg_buffercache.sql.in =================================================================== *** contrib/pg_buffercache/pg_buffercache.sql.in (revision 1) --- contrib/pg_buffercache/pg_buffercache.sql.in (working copy) *************** *** 12,18 **** CREATE VIEW pg_buffercache AS SELECT P.* FROM pg_buffercache_pages() AS P (bufferid integer, relfilenode oid, reltablespace oid, reldatabase oid, ! relblocknumber int8, isdirty bool); -- Don't want these to be available at public. REVOKE ALL ON FUNCTION pg_buffercache_pages() FROM PUBLIC; --- 12,18 ---- CREATE VIEW pg_buffercache AS SELECT P.* FROM pg_buffercache_pages() AS P (bufferid integer, relfilenode oid, reltablespace oid, reldatabase oid, ! relblocknumber int8, isdirty bool, usagecount int2); -- Don't want these to be available at public. REVOKE ALL ON FUNCTION pg_buffercache_pages() FROM PUBLIC; Index: contrib/pgbench/README.pgbench =================================================================== *** contrib/pgbench/README.pgbench (revision 1) --- contrib/pgbench/README.pgbench (working copy) *************** *** 94,103 **** default is 1. NOTE: scaling factor should be at least as large as the largest number of clients you intend to test; else you'll mostly be measuring update contention. -D varname=value ! Define a variable. It can be refereed to by a script ! provided by using -f option. Multile -D options are allowed. -U login Specify db user's login name if it is different from --- 94,107 ---- default is 1. NOTE: scaling factor should be at least as large as the largest number of clients you intend to test; else you'll mostly be measuring update contention. + Regular (not initializing) runs using one of the + built-in tests will detect scale based on the number of + branches in the database. For custom (-f) runs it can + be manually specified with this parameter. -D varname=value ! Define a variable. It can be refered to by a script ! provided by using -f option. Multiple -D options are allowed. -U login Specify db user's login name if it is different from *************** *** 139,147 **** with the name "pgbench_log.xxx", where xxx is the PID of the pgbench process. The format of the log is: ! client_id transaction_no time ! where time is measured in microseconds. -d debug option. --- 143,157 ---- with the name "pgbench_log.xxx", where xxx is the PID of the pgbench process. The format of the log is: ! client_id transaction_no time file_no time-epoch time-us ! where time is measured in microseconds, , the file_no is ! which test file was used (useful when multiple were ! specified with -f), and time-epoch/time-us are a ! UNIX epoch format timestamp followed by an offset ! in microseconds (suitable for creating a ISO 8601 ! timestamp with a fraction of a second) of when ! the transaction completed. -d debug option. *************** *** 163,168 **** --- 173,180 ---- (7) end; + If you specify -N, (4) and (5) aren't included in the transaction. + o -f option This supports for reading transaction script from a specified Index: contrib/pgbench/pgbench.c =================================================================== *** contrib/pgbench/pgbench.c (revision 1) --- contrib/pgbench/pgbench.c (working copy) *************** *** 455,461 **** diff = (int) (now.tv_sec - st->txn_begin.tv_sec) * 1000000.0 + (int) (now.tv_usec - st->txn_begin.tv_usec); ! fprintf(LOGFILE, "%d %d %.0f\n", st->id, st->cnt, diff); } if (commands[st->state]->type == SQL_COMMAND) --- 455,462 ---- diff = (int) (now.tv_sec - st->txn_begin.tv_sec) * 1000000.0 + (int) (now.tv_usec - st->txn_begin.tv_usec); ! fprintf(LOGFILE, "%d %d %.0f %d %ld %ld\n", ! st->id, st->cnt, diff, st->use_file, now.tv_sec,now.tv_usec); } if (commands[st->state]->type == SQL_COMMAND) Index: src/test/regress/expected/rules.out =================================================================== *** src/test/regress/expected/rules.out (revision 1) --- src/test/regress/expected/rules.out (working copy) *************** *** 1290,1295 **** --- 1290,1296 ---- pg_stat_activity | SELECT d.oid AS datid, d.datname, pg_stat_get_backend_pid(s.backendid) AS procpid, pg_stat_get_backend_userid(s.backendid) AS usesysid, u.rolname AS usename, pg_stat_get_backend_activity(s.backendid) AS current_query, pg_stat_get_backend_waiting(s.backendid) AS waiting, pg_stat_get_backend_activity_start(s.backendid) AS query_start, pg_stat_get_backend_start(s.backendid) AS backend_start, pg_stat_get_backend_client_addr(s.backendid) AS client_addr, pg_stat_get_backend_client_port(s.backendid) AS client_port FROM pg_database d, (SELECT pg_stat_get_backend_idset() AS backendid) s, pg_authid u WHERE ((pg_stat_get_backend_dbid(s.backendid) = d.oid) AND (pg_stat_get_backend_userid(s.backendid) = u.oid)); pg_stat_all_indexes | SELECT c.oid AS relid, i.oid AS indexrelid, n.nspname AS schemaname, c.relname, i.relname AS indexrelname, pg_stat_get_numscans(i.oid) AS idx_scan, pg_stat_get_tuples_returned(i.oid) AS idx_tup_read, pg_stat_get_tuples_fetched(i.oid) AS idx_tup_fetch FROM (((pg_class c JOIN pg_index x ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])); pg_stat_all_tables | SELECT c.oid AS relid, n.nspname AS schemaname, c.relname, pg_stat_get_numscans(c.oid) AS seq_scan, pg_stat_get_tuples_returned(c.oid) AS seq_tup_read, (sum(pg_stat_get_numscans(i.indexrelid)))::bigint AS idx_scan, ((sum(pg_stat_get_tuples_fetched(i.indexrelid)))::bigint + pg_stat_get_tuples_fetched(c.oid)) AS idx_tup_fetch, pg_stat_get_tuples_inserted(c.oid) AS n_tup_ins, pg_stat_get_tuples_updated(c.oid) AS n_tup_upd, pg_stat_get_tuples_deleted(c.oid) AS n_tup_del, pg_stat_get_last_vacuum_time(c.oid) AS last_vacuum, pg_stat_get_last_autovacuum_time(c.oid) AS last_autovacuum, pg_stat_get_last_analyze_time(c.oid) AS last_analyze, pg_stat_get_last_autoanalyze_time(c.oid) AS last_autoanalyze FROM ((pg_class c LEFT JOIN pg_index i ON ((c.oid = i.indrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (c.relkind = ANY (ARRAY['r'::"char", 't'::"char"])) GROUP BY c.oid, n.nspname, c.relname; + pg_stat_bgwriter | SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints_timed, pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req, pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint, pg_stat_get_bgwriter_buf_written_lru() AS buffers_lru, pg_stat_get_bgwriter_buf_written_all() AS buffers_all, pg_stat_get_bgwriter_maxwritten_lru() AS maxwritten_lru, pg_stat_get_bgwriter_maxwritten_all() AS maxwritten_all, pg_stat_get_bgwriter_buf_written_backend() AS buffers_backend, pg_stat_get_bgwriter_buf_alloc AS buffers_alloc; pg_stat_database | SELECT d.oid AS datid, d.datname, pg_stat_get_db_numbackends(d.oid) AS numbackends, pg_stat_get_db_xact_commit(d.oid) AS xact_commit, pg_stat_get_db_xact_rollback(d.oid) AS xact_rollback, (pg_stat_get_db_blocks_fetched(d.oid) - pg_stat_get_db_blocks_hit(d.oid)) AS blks_read, pg_stat_get_db_blocks_hit(d.oid) AS blks_hit FROM pg_database d; pg_stat_sys_indexes | SELECT pg_stat_all_indexes.relid, pg_stat_all_indexes.indexrelid, pg_stat_all_indexes.schemaname, pg_stat_all_indexes.relname, pg_stat_all_indexes.indexrelname, pg_stat_all_indexes.idx_scan, pg_stat_all_indexes.idx_tup_read, pg_stat_all_indexes.idx_tup_fetch FROM pg_stat_all_indexes WHERE (pg_stat_all_indexes.schemaname = ANY (ARRAY['pg_catalog'::name, 'pg_toast'::name, 'information_schema'::name])); pg_stat_sys_tables | SELECT pg_stat_all_tables.relid, pg_stat_all_tables.schemaname, pg_stat_all_tables.relname, pg_stat_all_tables.seq_scan, pg_stat_all_tables.seq_tup_read, pg_stat_all_tables.idx_scan, pg_stat_all_tables.idx_tup_fetch, pg_stat_all_tables.n_tup_ins, pg_stat_all_tables.n_tup_upd, pg_stat_all_tables.n_tup_del, pg_stat_all_tables.last_vacuum, pg_stat_all_tables.last_autovacuum, pg_stat_all_tables.last_analyze, pg_stat_all_tables.last_autoanalyze FROM pg_stat_all_tables WHERE (pg_stat_all_tables.schemaname = ANY (ARRAY['pg_catalog'::name, 'pg_toast'::name, 'information_schema'::name])); Index: src/include/storage/buf_internals.h =================================================================== *** src/include/storage/buf_internals.h (revision 1) --- src/include/storage/buf_internals.h (working copy) *************** *** 186,192 **** /* freelist.c */ extern volatile BufferDesc *StrategyGetBuffer(void); extern void StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head); ! extern int StrategySyncStart(void); extern Size StrategyShmemSize(void); extern void StrategyInitialize(bool init); --- 186,193 ---- /* freelist.c */ extern volatile BufferDesc *StrategyGetBuffer(void); extern void StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head); ! extern int StrategySyncStart(int *num_buf_alloc, int *num_backend_writes); ! extern void StrategyReportWrite(void); extern Size StrategyShmemSize(void); extern void StrategyInitialize(bool init); Index: src/include/storage/bufmgr.h =================================================================== *** src/include/storage/bufmgr.h (revision 1) --- src/include/storage/bufmgr.h (working copy) *************** *** 150,156 **** extern void AbortBufferIO(void); extern void BufmgrCommit(void); ! extern void BufferSync(void); extern void BgBufferSync(void); extern void AtProcExit_LocalBuffers(void); --- 150,156 ---- extern void AbortBufferIO(void); extern void BufmgrCommit(void); ! extern int BufferSync(void); extern void BgBufferSync(void); extern void AtProcExit_LocalBuffers(void); Index: src/include/pgstat.h =================================================================== *** src/include/pgstat.h (revision 1) --- src/include/pgstat.h (working copy) *************** *** 30,36 **** PGSTAT_MTYPE_RESETCOUNTER, PGSTAT_MTYPE_AUTOVAC_START, PGSTAT_MTYPE_VACUUM, ! PGSTAT_MTYPE_ANALYZE } StatMsgType; /* ---------- --- 30,37 ---- PGSTAT_MTYPE_RESETCOUNTER, PGSTAT_MTYPE_AUTOVAC_START, PGSTAT_MTYPE_VACUUM, ! PGSTAT_MTYPE_ANALYZE, ! PGSTAT_MTYPE_BGWRITER } StatMsgType; /* ---------- *************** *** 213,218 **** --- 214,238 ---- /* ---------- + * PgStat_MsgBgWriter Sent by the bgwriter to update statistics. + * ---------- + */ + typedef struct PgStat_MsgBgWriter + { + PgStat_MsgHdr m_hdr; + + PgStat_Counter m_timed_checkpoints; + PgStat_Counter m_requested_checkpoints; + PgStat_Counter m_buf_written_checkpoints; + PgStat_Counter m_buf_written_lru; + PgStat_Counter m_buf_written_all; + PgStat_Counter m_maxwritten_lru; + PgStat_Counter m_maxwritten_all; + PgStat_Counter m_buf_written_backend; + PgStat_Counter m_buf_alloc; + } PgStat_MsgBgWriter; + + /* ---------- * PgStat_Msg Union over all possible messages. * ---------- */ *************** *** 227,232 **** --- 247,253 ---- PgStat_MsgAutovacStart msg_autovacuum; PgStat_MsgVacuum msg_vacuum; PgStat_MsgAnalyze msg_analyze; + PgStat_MsgBgWriter msg_bgwriter; } PgStat_Msg; *************** *** 292,297 **** --- 313,335 ---- } PgStat_StatTabEntry; + /* + * Global statistics kept in the stats collector + */ + typedef struct PgStat_GlobalStats + { + PgStat_Counter timed_checkpoints; + PgStat_Counter requested_checkpoints; + PgStat_Counter buf_written_checkpoints; + PgStat_Counter buf_written_lru; + PgStat_Counter buf_written_all; + PgStat_Counter maxwritten_lru; + PgStat_Counter maxwritten_all; + PgStat_Counter buf_written_backend; + PgStat_Counter buf_alloc; + } PgStat_GlobalStats; + + /* ---------- * Shared-memory data structures * ---------- *************** *** 469,474 **** --- 507,513 ---- extern void pgstat_count_xact_commit(void); extern void pgstat_count_xact_rollback(void); + extern void pgstat_send_bgwriter(void); /* ---------- * Support functions for the SQL-callable functions to *************** *** 479,483 **** --- 518,523 ---- extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid); extern PgBackendStatus *pgstat_fetch_stat_beentry(int beid); extern int pgstat_fetch_stat_numbackends(void); + extern PgStat_GlobalStats *pgstat_fetch_global(void); #endif /* PGSTAT_H */ Index: src/include/catalog/pg_proc.h =================================================================== *** src/include/catalog/pg_proc.h (revision 1) --- src/include/catalog/pg_proc.h (working copy) *************** *** 2858,2865 **** DESCR("current user privilege on relation by rel name"); DATA(insert OID = 1927 ( has_table_privilege PGNSP PGUID 12 f f t f s 2 16 "26 25" _null_ _null_ _null_ has_table_privilege_id - _null_ )); DESCR("current user privilege on relation by rel oid"); - - DATA(insert OID = 1928 ( pg_stat_get_numscans PGNSP PGUID 12 f f t f s 1 20 "26" _null_ _null_ _null_ pg_stat_get_numscans - _null_ )); DESCR("Statistics: Number of scans done for table/index"); DATA(insert OID = 1929 ( pg_stat_get_tuples_returned PGNSP PGUID 12 f f t f s 1 20 "26" _null_ _null_ _null_ pg_stat_get_tuples_returned - _null_ )); --- 2858,2863 ---- *************** *** 2876,2881 **** --- 2874,2899 ---- DESCR("Statistics: Number of blocks fetched"); DATA(insert OID = 1935 ( pg_stat_get_blocks_hit PGNSP PGUID 12 f f t f s 1 20 "26" _null_ _null_ _null_ pg_stat_get_blocks_hit - _null_ )); DESCR("Statistics: Number of blocks found in cache"); + + DATA(insert OID = 2769 ( pg_stat_get_bgwriter_timed_checkpoints PGNSP PGUID 12 f f t f s 0 20 "" _null_ _null_ _null_ pg_stat_get_bgwriter_timed_checkpoints - _null_ )); + DESCR("Statistics: Number of timed checkpoints started by the bgwriter"); + DATA(insert OID = 2770 ( pg_stat_get_bgwriter_requested_checkpoints PGNSP PGUID 12 f f t f s 0 20 "" _null_ _null_ _null_ pg_stat_get_bgwriter_requested_checkpoints - _null_ )); + DESCR("Statistics: Number of backend requested checkpoints started by the bgwriter"); + DATA(insert OID = 2771 ( pg_stat_get_bgwriter_buf_written_checkpoints PGNSP PGUID 12 f f t f s 0 20 "" _null_ _null_ _null_ pg_stat_get_bgwriter_buf_written_checkpoints - _null_ )); + DESCR("Statistics: Number of buffers written by the bgwriter during checkpoints"); + DATA(insert OID = 2772 ( pg_stat_get_bgwriter_buf_written_lru PGNSP PGUID 12 f f t f s 0 20 "" _null_ _null_ _null_ pg_stat_get_bgwriter_buf_written_lru - _null_ )); + DESCR("Statistics: Number of buffers written by the bgwriter during LRU scans"); + DATA(insert OID = 2773 ( pg_stat_get_bgwriter_buf_written_all PGNSP PGUID 12 f f t f s 0 20 "" _null_ _null_ _null_ pg_stat_get_bgwriter_buf_written_all - _null_ )); + DESCR("Statistics: Number of buffers written by the bgwriter during all-buffer scans"); + DATA(insert OID = 2774 ( pg_stat_get_bgwriter_maxwritten_lru PGNSP PGUID 12 f f t f s 0 20 "" _null_ _null_ _null_ pg_stat_get_bgwriter_maxwritten_lru - _null_ )); + DESCR("Statistics: Number of times the bgwriter stopped processing when it had written too many buffers during LRU scans"); + DATA(insert OID = 2775 ( pg_stat_get_bgwriter_maxwritten_all PGNSP PGUID 12 f f t f s 0 20 "" _null_ _null_ _null_ pg_stat_get_bgwriter_maxwritten_all - _null_ )); + DESCR("Statistics: Number of times the bgwriter stopped processing when it had written too many buffers during all-buffer scans"); + DATA(insert OID = 2776 ( pg_stat_get_bgwriter_buf_written_backend PGNSP PGUID 12 f f t f s 0 20 "" _null_ _null_ _null_ pg_stat_get_bgwriter_buf_written_backend - _null_ )); + DESCR("Statistics: Number of buffers written by client backends"); + DATA(insert OID = 2777 ( pg_stat_get_bgwriter_buf_alloc PGNSP PGUID 12 f f t f s 0 20 "" _null_ _null_ _null_ pg_stat_get_bgwriter_buf_alloc - _null_ )); + DESCR("Statistics: Number of buffers allocated by the shared buffer cache"); + DATA(insert OID = 2781 ( pg_stat_get_last_vacuum_time PGNSP PGUID 12 f f t f s 1 1184 "26" _null_ _null_ _null_ pg_stat_get_last_vacuum_time - _null_)); DESCR("Statistics: Last manual vacuum time for a table"); DATA(insert OID = 2782 ( pg_stat_get_last_autovacuum_time PGNSP PGUID 12 f f t f s 1 1184 "26" _null_ _null_ _null_ pg_stat_get_last_autovacuum_time - _null_)); Index: src/backend/utils/adt/pgstatfuncs.c =================================================================== *** src/backend/utils/adt/pgstatfuncs.c (revision 1) --- src/backend/utils/adt/pgstatfuncs.c (working copy) *************** *** 35,40 **** --- 35,50 ---- extern Datum pg_stat_get_last_analyze_time(PG_FUNCTION_ARGS); extern Datum pg_stat_get_last_autoanalyze_time(PG_FUNCTION_ARGS); + extern Datum pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS); + extern Datum pg_stat_get_bgwriter_requested_checkpoints(PG_FUNCTION_ARGS); + extern Datum pg_stat_get_bgwriter_buf_written_checkpoints(PG_FUNCTION_ARGS); + extern Datum pg_stat_get_bgwriter_buf_written_lru(PG_FUNCTION_ARGS); + extern Datum pg_stat_get_bgwriter_buf_written_all(PG_FUNCTION_ARGS); + extern Datum pg_stat_get_bgwriter_maxwritten_lru(PG_FUNCTION_ARGS); + extern Datum pg_stat_get_bgwriter_maxwritten_all(PG_FUNCTION_ARGS); + extern Datum pg_stat_get_bgwriter_buf_written_backend(PG_FUNCTION_ARGS); + extern Datum pg_stat_get_bgwriter_buf_alloc(PG_FUNCTION_ARGS); + extern Datum pg_stat_get_backend_idset(PG_FUNCTION_ARGS); extern Datum pg_backend_pid(PG_FUNCTION_ARGS); extern Datum pg_stat_reset(PG_FUNCTION_ARGS); *************** *** 54,59 **** --- 64,71 ---- extern Datum pg_stat_get_db_blocks_fetched(PG_FUNCTION_ARGS); extern Datum pg_stat_get_db_blocks_hit(PG_FUNCTION_ARGS); + /* globals defined in bgwriter.c */ + extern PgStat_MsgBgWriter bgwriterStats; Datum pg_stat_get_numscans(PG_FUNCTION_ARGS) *************** *** 605,610 **** --- 617,676 ---- PG_RETURN_INT64(result); } + Datum + pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS) + { + PG_RETURN_INT64(pgstat_fetch_global()->timed_checkpoints); + } + + Datum + pg_stat_get_bgwriter_requested_checkpoints(PG_FUNCTION_ARGS) + { + PG_RETURN_INT64(pgstat_fetch_global()->requested_checkpoints); + } + + Datum + pg_stat_get_bgwriter_buf_written_checkpoints(PG_FUNCTION_ARGS) + { + PG_RETURN_INT64(pgstat_fetch_global()->buf_written_checkpoints); + } + + Datum + pg_stat_get_bgwriter_buf_written_lru(PG_FUNCTION_ARGS) + { + PG_RETURN_INT64(pgstat_fetch_global()->buf_written_lru); + } + + Datum + pg_stat_get_bgwriter_buf_written_all(PG_FUNCTION_ARGS) + { + PG_RETURN_INT64(pgstat_fetch_global()->buf_written_all); + } + + Datum + pg_stat_get_bgwriter_maxwritten_lru(PG_FUNCTION_ARGS) + { + PG_RETURN_INT64(pgstat_fetch_global()->maxwritten_lru); + } + + Datum + pg_stat_get_bgwriter_maxwritten_all(PG_FUNCTION_ARGS) + { + + PG_RETURN_INT64(pgstat_fetch_global()->maxwritten_all); + } + + Datum + pg_stat_get_bgwriter_buf_written_backend(PG_FUNCTION_ARGS) + { + PG_RETURN_INT64(pgstat_fetch_global()->buf_written_backend); + } + + Datum + pg_stat_get_bgwriter_buf_alloc(PG_FUNCTION_ARGS) + { + PG_RETURN_INT64(pgstat_fetch_global()->buf_alloc); + } Datum pg_stat_get_db_blocks_hit(PG_FUNCTION_ARGS) Index: src/backend/postmaster/postmaster.c =================================================================== *** src/backend/postmaster/postmaster.c (revision 1) --- src/backend/postmaster/postmaster.c (working copy) *************** *** 2574,2580 **** /* in parent, successful fork */ ereport(DEBUG2, ! (errmsg_internal("forked new backend, pid=%d socket=%d", (int) pid, port->sock))); /* --- 2574,2580 ---- /* in parent, successful fork */ ereport(DEBUG2, ! (errmsg_internal("forked new backend (PID %d) socket=%d", (int) pid, port->sock))); /* Index: src/backend/postmaster/pgstat.c =================================================================== *** src/backend/postmaster/pgstat.c (revision 1) --- src/backend/postmaster/pgstat.c (working copy) *************** *** 136,141 **** --- 136,151 ---- static PgBackendStatus *localBackendStatusTable = NULL; static int localNumBackends = 0; + /* + * BgWriter global statistics counters + */ + extern PgStat_MsgBgWriter BgWriterStats; + + /* + * Cluster wide statistics, kept in the stats process + */ + static PgStat_GlobalStats globalStats; + static volatile bool need_exit = false; static volatile bool need_statwrite = false; *************** *** 171,176 **** --- 181,187 ---- static void pgstat_recv_autovac(PgStat_MsgAutovacStart *msg, int len); static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len); static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len); + static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len); /* ------------------------------------------------------------ *************** *** 1284,1289 **** --- 1295,1316 ---- return localNumBackends; } + /* + * --------- + * pgstat_fetch_global() - + * + * Support function for the SQL-callable pgstat* functions. Returns + * a pointer to the global statistics struct. + * --------- + */ + PgStat_GlobalStats * + pgstat_fetch_global(void) + { + backend_read_statsfile(); + + return &globalStats; + } + /* ------------------------------------------------------------ * Functions for management of the shared-memory PgBackendStatus array *************** *** 1615,1620 **** --- 1642,1684 ---- #endif } + /* ---------- + * pgstat_send_bgwriter() - + * + * Send bgwriter statistics + */ + void + pgstat_send_bgwriter(void) + { + /* + * This function can be called even if nothing at all happende. + * In this case, avoid sending a completely empty message to + * the stats collector. + */ + if (BgWriterStats.m_timed_checkpoints == 0 && + BgWriterStats.m_requested_checkpoints == 0 && + BgWriterStats.m_buf_written_checkpoints == 0 && + BgWriterStats.m_buf_written_lru == 0 && + BgWriterStats.m_buf_written_all == 0 && + BgWriterStats.m_maxwritten_lru == 0 && + BgWriterStats.m_maxwritten_all == 0 && + BgWriterStats.m_buf_written_backend == 0 && + BgWriterStats.m_buf_alloc == 0) + return; + + /* + * Prepare and send off the message + */ + pgstat_setheader(&BgWriterStats.m_hdr, PGSTAT_MTYPE_BGWRITER); + pgstat_send(&BgWriterStats, sizeof(BgWriterStats)); + + /* + * Clear out the bgwriter statistics buffer, so it can be + * re-used. + */ + memset(&BgWriterStats, 0, sizeof(BgWriterStats)); + } + /* ---------- * PgstatCollectorMain() - *************** *** 1861,1866 **** --- 1925,1934 ---- pgstat_recv_analyze((PgStat_MsgAnalyze *) &msg, len); break; + case PGSTAT_MTYPE_BGWRITER: + pgstat_recv_bgwriter((PgStat_MsgBgWriter *) &msg, len); + break; + default: break; } *************** *** 1995,2000 **** --- 2063,2073 ---- fwrite(&format_id, sizeof(format_id), 1, fpout); /* + * Write global stats + */ + fwrite(&globalStats, sizeof(globalStats), 1, fpout); + + /* * Walk through the database table. */ hash_seq_init(&hstat, pgStatDBHash); *************** *** 2111,2116 **** --- 2184,2195 ---- HASH_ELEM | HASH_FUNCTION | mcxt_flags); /* + * Clear out global statistics in case they can't be loaded from an existing + * statsfile + */ + memset(&globalStats, 0, sizeof(globalStats)); + + /* * Try to open the status file. If it doesn't exist, the backends simply * return zero for anything and the collector simply starts from scratch * with empty counters. *************** *** 2130,2135 **** --- 2209,2224 ---- } /* + * Read global stats + */ + if (fread(&globalStats, 1, sizeof(globalStats), fpin) != sizeof(globalStats)) + { + ereport(pgStatRunningInCollector ? LOG : WARNING, + (errmsg("corrupted pgstat.stat file"))); + goto done; + } + + /* * We found an existing collector stats file. Read it and put all the * hashtable entries into place. */ *************** *** 2595,2597 **** --- 2684,2707 ---- tabentry->n_dead_tuples = msg->m_dead_tuples; tabentry->last_anl_tuples = msg->m_live_tuples + msg->m_dead_tuples; } + + + /* ---------- + * pgstat_recv_bgwriter() - + * + * Process a BGWRITER message. + * ---------- + */ + static void + pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len) + { + globalStats.timed_checkpoints += msg->m_timed_checkpoints; + globalStats.requested_checkpoints += msg->m_requested_checkpoints; + globalStats.buf_written_checkpoints += msg->m_buf_written_checkpoints; + globalStats.buf_written_lru += msg->m_buf_written_lru; + globalStats.buf_written_all += msg->m_buf_written_all; + globalStats.maxwritten_lru += msg->m_maxwritten_lru; + globalStats.maxwritten_all += msg->m_maxwritten_all; + globalStats.buf_written_backend += msg->m_buf_written_backend; + globalStats.buf_alloc += msg->m_buf_alloc; + } Index: src/backend/postmaster/bgwriter.c =================================================================== *** src/backend/postmaster/bgwriter.c (revision 1) --- src/backend/postmaster/bgwriter.c (working copy) *************** *** 50,55 **** --- 50,56 ---- #include "access/xlog_internal.h" #include "libpq/pqsignal.h" #include "miscadmin.h" + #include "pgstat.h" #include "postmaster/bgwriter.h" #include "storage/fd.h" #include "storage/freespace.h" *************** *** 125,130 **** --- 126,138 ---- static BgWriterShmemStruct *BgWriterShmem; /* + * BgWriter statistic counters, as sent to the stats collector + * Stored directly in a stats message structure so it can be sent + * without nedeing to copy things around. + */ + PgStat_MsgBgWriter BgWriterStats; + + /* * GUC parameters */ int BgWriterDelay = 200; *************** *** 141,147 **** /* * Private state */ ! static bool am_bg_writer = false; static bool ckpt_active = false; --- 149,155 ---- /* * Private state */ ! bool am_bg_writer = false; static bool ckpt_active = false; *************** *** 243,248 **** --- 251,261 ---- MemoryContextSwitchTo(bgwriter_context); /* + * Initialize statistics counters to zero + */ + memset(&BgWriterStats, 0, sizeof(BgWriterStats)); + + /* * If an exception is encountered, processing resumes here. * * See notes in postgres.c about the design of this coding. *************** *** 354,359 **** --- 367,373 ---- checkpoint_requested = false; do_checkpoint = true; force_checkpoint = true; + BgWriterStats.m_requested_checkpoints++; } if (shutdown_requested) { *************** *** 376,382 **** --- 390,401 ---- now = time(NULL); elapsed_secs = now - last_checkpoint_time; if (elapsed_secs >= CheckPointTimeout) + { do_checkpoint = true; + ereport(LOG, (errmsg("checkpoint required (timeout passed)"))); + if (!force_checkpoint) + BgWriterStats.m_timed_checkpoints++; + } /* * Do a checkpoint if requested, otherwise do one cycle of *************** *** 474,479 **** --- 493,503 ---- } /* + * Send of activity statistics to the stats collector + */ + pgstat_send_bgwriter(); + + /* * Nap for the configured time, or sleep for 10 seconds if there is no * bgwriter activity configured. * *************** *** 789,791 **** --- 813,816 ---- END_CRIT_SECTION(); } + Index: src/backend/postmaster/autovacuum.c =================================================================== *** src/backend/postmaster/autovacuum.c (revision 1) --- src/backend/postmaster/autovacuum.c (working copy) *************** *** 431,436 **** --- 431,438 ---- * And do an appropriate amount of work */ do_autovacuum(db->entry); + ereport(DEBUG2, + (errmsg("autovacuum: processing database \"%s\" complete", db->name))); } /* One iteration done, go away */ Index: src/backend/access/transam/xlog.c =================================================================== *** src/backend/access/transam/xlog.c (revision 1) --- src/backend/access/transam/xlog.c (working copy) *************** *** 1577,1586 **** if (new_highbits != old_highbits || new_segno >= old_segno + (uint32) CheckPointSegments) { ! #ifdef WAL_DEBUG ! if (XLOG_DEBUG) ! elog(LOG, "time for a checkpoint, signaling bgwriter"); ! #endif RequestCheckpoint(false, true); } } --- 1577,1584 ---- if (new_highbits != old_highbits || new_segno >= old_segno + (uint32) CheckPointSegments) { ! ereport(LOG,(errmsg( ! "checkpoint required (wrote checkpoint_segments)"))); RequestCheckpoint(false, true); } } *************** *** 1940,1945 **** --- 1938,1945 ---- (errcode_for_file_access(), errmsg("could not create file \"%s\": %m", tmppath))); + ereport(LOG, (errmsg("WAL creating and filling new file on disk"))); + /* * Zero-fill the file. We have to do this the hard way to ensure that all * the file space has really been allocated --- on platforms that allow *************** *** 2647,2653 **** true, &max_advance, true)) { ! ereport(DEBUG2, (errmsg("recycled transaction log file \"%s\"", xlde->d_name))); (*nsegsrecycled)++; --- 2647,2653 ---- true, &max_advance, true)) { ! ereport(DEBUG3, (errmsg("recycled transaction log file \"%s\"", xlde->d_name))); (*nsegsrecycled)++; *************** *** 5519,5524 **** --- 5519,5525 ---- LWLockRelease(CheckpointStartLock); LWLockRelease(CheckpointLock); END_CRIT_SECTION(); + ereport(LOG,(errmsg("checkpoint request ignored (no changes)"))); return; } } *************** *** 6183,6188 **** --- 6184,6190 ---- * have different checkpoint positions and hence different history * file names, even if nothing happened in between. */ + ereport(LOG, (errmsg("checkpoint required (starting backup)"))); RequestCheckpoint(true, false); /* Index: src/backend/storage/buffer/bufmgr.c =================================================================== *** src/backend/storage/buffer/bufmgr.c (revision 1) --- src/backend/storage/buffer/bufmgr.c (working copy) *************** *** 35,40 **** --- 35,41 ---- #include "postgres.h" #include + #include #include #include "miscadmin.h" *************** *** 72,78 **** * bufmgr */ long NDirectFileWrite; /* e.g., I/O in psort and hashjoin. */ - /* local state for StartBufferIO and related functions */ static volatile BufferDesc *InProgressBuf = NULL; static bool IsForInput; --- 73,78 ---- *************** *** 80,85 **** --- 80,89 ---- /* local state for LockBufferForCleanup */ static volatile BufferDesc *PinCountWaitBuf = NULL; + /* statistics for bgwriter. The contents of this variable only makes + * sense in the bgwriter */ + extern PgStat_MsgBgWriter BgWriterStats; + static bool PinBuffer(volatile BufferDesc *buf); static void PinBuffer_Locked(volatile BufferDesc *buf); *************** *** 940,956 **** * * This is called at checkpoint time to write out all dirty shared buffers. */ ! void BufferSync(void) { int buf_id; int num_to_scan; int absorb_counter; /* * Find out where to start the circular scan. */ ! buf_id = StrategySyncStart(); /* Make sure we can handle the pin inside SyncOneBuffer */ ResourceOwnerEnlargeBuffers(CurrentResourceOwner); --- 944,965 ---- * * This is called at checkpoint time to write out all dirty shared buffers. */ ! int BufferSync(void) { int buf_id; int num_to_scan; int absorb_counter; + int dirty_buffers; + int recent_alloc; + int num_backend_writes; /* * Find out where to start the circular scan. */ ! buf_id = StrategySyncStart(&recent_alloc,&num_backend_writes); ! BgWriterStats.m_buf_alloc+=recent_alloc; ! BgWriterStats.m_buf_written_backend+=num_backend_writes; /* Make sure we can handle the pin inside SyncOneBuffer */ ResourceOwnerEnlargeBuffers(CurrentResourceOwner); *************** *** 960,969 **** --- 969,981 ---- */ num_to_scan = NBuffers; absorb_counter = WRITES_PER_ABSORB; + dirty_buffers = 0; while (num_to_scan-- > 0) { if (SyncOneBuffer(buf_id, false)) { + dirty_buffers++; + /* * If in bgwriter, absorb pending fsync requests after each * WRITES_PER_ABSORB write operations, to prevent overflow of the *************** *** 979,984 **** --- 991,999 ---- if (++buf_id >= NBuffers) buf_id = 0; } + + BgWriterStats.m_buf_written_checkpoints+=dirty_buffers; + return dirty_buffers; } /* *************** *** 993,998 **** --- 1008,1015 ---- int buf_id2; int num_to_scan; int num_written; + int recent_alloc; + int num_backend_writes; /* Make sure we can handle the pin inside SyncOneBuffer */ ResourceOwnerEnlargeBuffers(CurrentResourceOwner); *************** *** 1027,1037 **** --- 1044,1062 ---- if (SyncOneBuffer(buf_id1, false)) { if (++num_written >= bgwriter_all_maxpages) + { + BgWriterStats.m_maxwritten_all++; break; + } } } + BgWriterStats.m_buf_written_all += num_written; } + buf_id2 = StrategySyncStart(&recent_alloc,&num_backend_writes); + BgWriterStats.m_buf_alloc+=recent_alloc; + BgWriterStats.m_buf_written_backend+=num_backend_writes; + /* * This loop considers only unpinned buffers close to the clock sweep * point. *************** *** 1041,1058 **** num_to_scan = (int) ((NBuffers * bgwriter_lru_percent + 99) / 100); num_written = 0; - buf_id2 = StrategySyncStart(); - while (num_to_scan-- > 0) { if (SyncOneBuffer(buf_id2, true)) { if (++num_written >= bgwriter_lru_maxpages) break; } if (++buf_id2 >= NBuffers) buf_id2 = 0; } } } --- 1066,1085 ---- num_to_scan = (int) ((NBuffers * bgwriter_lru_percent + 99) / 100); num_written = 0; while (num_to_scan-- > 0) { if (SyncOneBuffer(buf_id2, true)) { if (++num_written >= bgwriter_lru_maxpages) + { + BgWriterStats.m_maxwritten_lru++; break; + } } if (++buf_id2 >= NBuffers) buf_id2 = 0; } + BgWriterStats.m_buf_written_lru += num_written; } } *************** *** 1272,1279 **** void FlushBufferPool(void) { ! BufferSync(); smgrsync(); } --- 1299,1326 ---- void FlushBufferPool(void) { ! int dirty; ! struct timeval start_t, sync_t, end_t; ! long u_sec_for_buffers, u_sec_for_sync; ! ! gettimeofday(&start_t, NULL); ! ereport(LOG, (errmsg("checkpoint flushing buffer pool"))); ! dirty=BufferSync(); ! gettimeofday(&sync_t, NULL); ! ereport(LOG, (errmsg("checkpoint database fsync starting"))); smgrsync(); + gettimeofday(&end_t, NULL); + ereport(LOG, (errmsg("checkpoint database fsync complete"))); + + u_sec_for_buffers = 1000000 * (long) (sync_t.tv_sec - start_t.tv_sec) + + (long) (sync_t.tv_usec - start_t.tv_usec); + u_sec_for_sync = 1000000 * (long) (end_t.tv_sec - sync_t.tv_sec) + + (long) (end_t.tv_usec - sync_t.tv_usec); + + ereport(LOG,(errmsg( + "checkpoint buffers dirty=%.1f MB (%.1f%%) write=%.1f ms sync=%.1f ms", + (float) (BLCKSZ * dirty) / (1024*1024), (float) 100 * dirty / NBuffers, + (float) u_sec_for_buffers / 1000, (float) u_sec_for_sync / 1000))); } *************** *** 1400,1405 **** --- 1447,1453 ---- false); BufferFlushCount++; + StrategyReportWrite(); /* * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and Index: src/backend/storage/buffer/freelist.c =================================================================== *** src/backend/storage/buffer/freelist.c (revision 1) --- src/backend/storage/buffer/freelist.c (working copy) *************** *** 29,35 **** int firstFreeBuffer; /* Head of list of unused buffers */ int lastFreeBuffer; /* Tail of list of unused buffers */ ! /* * NOTE: lastFreeBuffer is undefined when firstFreeBuffer is -1 (that is, * when the list is empty) --- 29,36 ---- int firstFreeBuffer; /* Head of list of unused buffers */ int lastFreeBuffer; /* Tail of list of unused buffers */ ! int numGetBuffer; /* Calls to BufferAlloc since last reset */ ! int numClientWrites; /* Buffers written by backends since last reset */ /* * NOTE: lastFreeBuffer is undefined when firstFreeBuffer is -1 (that is, * when the list is empty) *************** *** 42,47 **** --- 43,50 ---- /* Backend-local state about whether currently vacuuming */ bool strategy_hint_vacuum = false; + /* Used to determine which type of process we're running as */ + extern bool am_bg_writer; /* * StrategyGetBuffer *************** *** 62,67 **** --- 65,71 ---- int trycounter; LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); + StrategyControl->numGetBuffer++; /* * Try to get a buffer from the freelist. Note that the freeNext fields *************** *** 176,182 **** * BufferSync() will proceed circularly around the buffer array from there. */ int ! StrategySyncStart(void) { int result; --- 180,186 ---- * BufferSync() will proceed circularly around the buffer array from there. */ int ! StrategySyncStart(int *num_buf_alloc,int *num_backend_writes) { int result; *************** *** 186,196 **** --- 190,224 ---- */ LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); result = StrategyControl->nextVictimBuffer; + + /* Return and reset statistics for activity since last call */ + if (num_buf_alloc!=NULL) *num_buf_alloc=StrategyControl->numGetBuffer; + if (num_backend_writes!=NULL) *num_backend_writes=StrategyControl->numClientWrites; + StrategyControl->numGetBuffer = 0; + StrategyControl->numClientWrites = 0; + LWLockRelease(BufFreelistLock); return result; } /* + * StrategyReportWrite -- After a buffer is written out, update + * local statistics based on who did the writing + */ + void + StrategyReportWrite(void) + { + /* The background writer keeps track of buffers it writes already, + only count backend writes */ + if (!am_bg_writer) + { + LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); + StrategyControl->numClientWrites++; + LWLockRelease(BufFreelistLock); + } + } + + /* * StrategyHintVacuum -- tell us whether VACUUM is active */ void *************** *** 270,275 **** --- 298,305 ---- /* Initialize the clock sweep pointer */ StrategyControl->nextVictimBuffer = 0; + StrategyControl->numGetBuffer = 0; + StrategyControl->numClientWrites = 0; } else Assert(!init); Index: src/backend/catalog/system_views.sql =================================================================== *** src/backend/catalog/system_views.sql (revision 1) --- src/backend/catalog/system_views.sql (working copy) *************** *** 356,358 **** --- 356,370 ---- pg_stat_get_db_blocks_hit(D.oid) AS blks_read, pg_stat_get_db_blocks_hit(D.oid) AS blks_hit FROM pg_database D; + + CREATE VIEW pg_stat_bgwriter AS + SELECT + pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints_timed, + pg_stat_get_bgwriter_requested_checkpoints() AS checkpoints_req, + pg_stat_get_bgwriter_buf_written_checkpoints() AS buffers_checkpoint, + pg_stat_get_bgwriter_buf_written_lru() AS buffers_lru, + pg_stat_get_bgwriter_buf_written_all() AS buffers_all, + pg_stat_get_bgwriter_maxwritten_lru() AS maxwritten_lru, + pg_stat_get_bgwriter_maxwritten_all() AS maxwritten_all, + pg_stat_get_bgwriter_buf_written_backend() AS buffers_backend, + pg_stat_get_bgwriter_buf_alloc() AS buffers_alloc;