From 3d351d916b20534f973eda760cde17d96545d4c4 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 30 Aug 2020 12:21:51 -0400 Subject: [PATCH] Redefine pg_class.reltuples to be -1 before the first VACUUM or ANALYZE. Historically, we've considered the state with relpages and reltuples both zero as indicating that we do not know the table's tuple density. This is problematic because it's impossible to distinguish "never yet vacuumed" from "vacuumed and seen to be empty". In particular, a user cannot use VACUUM or ANALYZE to override the planner's normal heuristic that an empty table should not be believed to be empty because it is probably about to get populated. That heuristic is a good safety measure, so I don't care to abandon it, but there should be a way to override it if the table is indeed intended to stay empty. Hence, represent the initial state of ignorance by setting reltuples to -1 (relpages is still set to zero), and apply the minimum-ten-pages heuristic only when reltuples is still -1. If the table is empty, VACUUM or ANALYZE (but not CREATE INDEX) will override that to reltuples = relpages = 0, and then we'll plan on that basis. This requires a bunch of fiddly little changes, but we can get rid of some ugly kluges that were formerly needed to maintain the old definition. One notable point is that FDWs' GetForeignRelSize methods will see baserel->tuples = -1 when no ANALYZE has been done on the foreign table. That seems like a net improvement, since those methods were formerly also in the dark about what baserel->tuples = 0 really meant. Still, it is an API change. I bumped catversion because code predating this change would get confused by seeing reltuples = -1. Discussion: https://postgr.es/m/F02298E0-6EF4-49A1-BCB6-C484794D9ACC@thebuild.com --- contrib/file_fdw/file_fdw.c | 2 +- contrib/pgstattuple/pgstatapprox.c | 3 +++ contrib/postgres_fdw/postgres_fdw.c | 7 +++-- doc/src/sgml/catalogs.sgml | 4 +++ doc/src/sgml/fdwhandler.sgml | 3 ++- src/backend/access/gin/ginvacuum.c | 2 +- src/backend/access/heap/vacuumlazy.c | 38 +++++++++++---------------- src/backend/access/nbtree/nbtree.c | 1 + src/backend/access/table/tableam.c | 22 +++++++--------- src/backend/catalog/heap.c | 4 +-- src/backend/catalog/index.c | 9 +++++++ src/backend/commands/vacuum.c | 14 +++++----- src/backend/optimizer/path/allpaths.c | 6 ++++- src/backend/optimizer/util/plancat.c | 11 +++----- src/backend/postmaster/autovacuum.c | 4 +++ src/backend/rewrite/rewriteDefine.c | 2 +- src/backend/utils/cache/relcache.c | 4 +-- src/include/access/genam.h | 4 +-- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_class.h | 4 +-- 20 files changed, 77 insertions(+), 69 deletions(-) diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c index fbcf7ca9c9..072a6dc1c1 100644 --- a/contrib/file_fdw/file_fdw.c +++ b/contrib/file_fdw/file_fdw.c @@ -996,7 +996,7 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel, /* * Estimate the number of tuples in the file. */ - if (baserel->pages > 0) + if (baserel->tuples >= 0 && baserel->pages > 0) { /* * We have # of pages and # of tuples from pg_class (that is, from a diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c index 3a99333d44..23306e11a7 100644 --- a/contrib/pgstattuple/pgstatapprox.c +++ b/contrib/pgstattuple/pgstatapprox.c @@ -195,6 +195,9 @@ statapprox_heap(Relation rel, output_type *stat) stat->tuple_count = vac_estimate_reltuples(rel, nblocks, scanned, stat->tuple_count); + /* It's not clear if we could get -1 here, but be safe. */ + stat->tuple_count = Max(stat->tuple_count, 0); + /* * Calculate percentages if the relation has one or more pages. */ diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 9fc53cad68..a31abce7c9 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -692,15 +692,14 @@ postgresGetForeignRelSize(PlannerInfo *root, else { /* - * If the foreign table has never been ANALYZEd, it will have relpages - * and reltuples equal to zero, which most likely has nothing to do - * with reality. We can't do a whole lot about that if we're not + * If the foreign table has never been ANALYZEd, it will have + * reltuples < 0, meaning "unknown". We can't do much if we're not * allowed to consult the remote server, but we can use a hack similar * to plancat.c's treatment of empty relations: use a minimum size * estimate of 10 pages, and divide by the column-datatype-based width * estimate to get the corresponding number of tuples. */ - if (baserel->pages == 0 && baserel->tuples == 0) + if (baserel->tuples < 0) { baserel->pages = 10; baserel->tuples = diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 9fe260ecff..1d1b8ce8fb 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1977,6 +1977,10 @@ SCRAM-SHA-256$<iteration count>:&l the planner. It is updated by VACUUM, ANALYZE, and a few DDL commands such as CREATE INDEX. + If the table has never yet been vacuumed or + analyzed, reltuples + contains -1 indicating that the row count is + unknown. diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml index 74793035d7..72fa127212 100644 --- a/doc/src/sgml/fdwhandler.sgml +++ b/doc/src/sgml/fdwhandler.sgml @@ -130,7 +130,8 @@ GetForeignRelSize(PlannerInfo *root, (The initial value is from pg_class.reltuples which represents the total row count seen by the - last ANALYZE.) + last ANALYZE; it will be -1 if + no ANALYZE has been done on this foreign table.) diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c index 9cd6638df6..0935a6d9e5 100644 --- a/src/backend/access/gin/ginvacuum.c +++ b/src/backend/access/gin/ginvacuum.c @@ -727,7 +727,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) * entries. This is bogus if the index is partial, but it's real hard to * tell how many distinct heap entries are referenced by a GIN index. */ - stats->num_index_tuples = info->num_heap_tuples; + stats->num_index_tuples = Max(info->num_heap_tuples, 0); stats->estimated_count = info->estimated_count; /* diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index a0da444af0..53b1a95254 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -208,7 +208,8 @@ typedef struct LVShared * live tuples in the index vacuum case or the new live tuples in the * index cleanup case. * - * estimated_count is true if reltuples is an estimated value. + * estimated_count is true if reltuples is an estimated value. (Note that + * reltuples could be -1 in this case, indicating we have no idea.) */ double reltuples; bool estimated_count; @@ -567,31 +568,19 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params, /* * Update statistics in pg_class. * - * A corner case here is that if we scanned no pages at all because every - * page is all-visible, we should not update relpages/reltuples, because - * we have no new information to contribute. In particular this keeps us - * from replacing relpages=reltuples=0 (which means "unknown tuple - * density") with nonzero relpages and reltuples=0 (which means "zero - * tuple density") unless there's some actual evidence for the latter. + * In principle new_live_tuples could be -1 indicating that we (still) + * don't know the tuple count. In practice that probably can't happen, + * since we'd surely have scanned some pages if the table is new and + * nonempty. * - * It's important that we use tupcount_pages and not scanned_pages for the - * check described above; scanned_pages counts pages where we could not - * get cleanup lock, and which were processed only for frozenxid purposes. - * - * We do update relallvisible even in the corner case, since if the table - * is all-visible we'd definitely like to know that. But clamp the value - * to be not more than what we're setting relpages to. + * For safety, clamp relallvisible to be not more than what we're setting + * relpages to. * * Also, don't change relfrozenxid/relminmxid if we skipped any pages, * since then we don't know for certain that all tuples have a newer xmin. */ new_rel_pages = vacrelstats->rel_pages; new_live_tuples = vacrelstats->new_live_tuples; - if (vacrelstats->tupcount_pages == 0 && new_rel_pages > 0) - { - new_rel_pages = vacrelstats->old_rel_pages; - new_live_tuples = vacrelstats->old_live_tuples; - } visibilitymap_count(onerel, &new_rel_allvisible, NULL); if (new_rel_allvisible > new_rel_pages) @@ -612,7 +601,7 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params, /* report results to the stats collector, too */ pgstat_report_vacuum(RelationGetRelid(onerel), onerel->rd_rel->relisshared, - new_live_tuples, + Max(new_live_tuples, 0), vacrelstats->new_dead_tuples); pgstat_progress_end_command(); @@ -1695,9 +1684,12 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, vacrelstats->tupcount_pages, live_tuples); - /* also compute total number of surviving heap entries */ + /* + * Also compute the total number of surviving heap entries. In the + * (unlikely) scenario that new_live_tuples is -1, take it as zero. + */ vacrelstats->new_rel_tuples = - vacrelstats->new_live_tuples + vacrelstats->new_dead_tuples; + Max(vacrelstats->new_live_tuples, 0) + vacrelstats->new_dead_tuples; /* * Release any remaining pin on visibility map page. @@ -2434,7 +2426,7 @@ lazy_cleanup_all_indexes(Relation *Irel, IndexBulkDeleteResult **stats, * dead_tuples, and update running statistics. * * reltuples is the number of heap tuples to be passed to the - * bulkdelete callback. + * bulkdelete callback. It's always assumed to be estimated. */ static void lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats, diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 8fa6ac7296..c822b49a71 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -853,6 +853,7 @@ _bt_vacuum_needs_cleanup(IndexVacuumInfo *info) prev_num_heap_tuples = metad->btm_last_cleanup_num_heap_tuples; if (cleanup_scale_factor <= 0 || + info->num_heap_tuples < 0 || prev_num_heap_tuples <= 0 || (info->num_heap_tuples - prev_num_heap_tuples) / prev_num_heap_tuples >= cleanup_scale_factor) diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c index c638319765..6438c45716 100644 --- a/src/backend/access/table/tableam.c +++ b/src/backend/access/table/tableam.c @@ -701,18 +701,14 @@ table_block_relation_estimate_size(Relation rel, int32 *attr_widths, * doesn't happen instantaneously, and it won't happen at all for cases * such as temporary tables.) * - * We approximate "never vacuumed" by "has relpages = 0", which means this - * will also fire on genuinely empty relations. Not great, but - * fortunately that's a seldom-seen case in the real world, and it - * shouldn't degrade the quality of the plan too much anyway to err in - * this direction. + * We test "never vacuumed" by seeing whether reltuples < 0. * * If the table has inheritance children, we don't apply this heuristic. * Totally empty parent tables are quite common, so we should be willing * to believe that they are empty. */ if (curpages < 10 && - relpages == 0 && + reltuples < 0 && !rel->rd_rel->relhassubclass) curpages = 10; @@ -727,17 +723,17 @@ table_block_relation_estimate_size(Relation rel, int32 *attr_widths, } /* estimate number of tuples from previous tuple density */ - if (relpages > 0) + if (reltuples >= 0 && relpages > 0) density = reltuples / (double) relpages; else { /* - * When we have no data because the relation was truncated, estimate - * tuple width from attribute datatypes. We assume here that the - * pages are completely full, which is OK for tables (since they've - * presumably not been VACUUMed yet) but is probably an overestimate - * for indexes. Fortunately get_relation_info() can clamp the - * overestimate to the parent table's size. + * When we have no data because the relation was never yet vacuumed, + * estimate tuple width from attribute datatypes. We assume here that + * the pages are completely full, which is OK for tables but is + * probably an overestimate for indexes. Fortunately + * get_relation_info() can clamp the overestimate to the parent + * table's size. * * Note: this code intentionally disregards alignment considerations, * because (a) that would be gilding the lily considering how crude diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index f2ca686397..abd5bdb866 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -1015,7 +1015,7 @@ AddNewRelationTuple(Relation pg_class_desc, case RELKIND_TOASTVALUE: /* The relation is real, but as yet empty */ new_rel_reltup->relpages = 0; - new_rel_reltup->reltuples = 0; + new_rel_reltup->reltuples = -1; new_rel_reltup->relallvisible = 0; break; case RELKIND_SEQUENCE: @@ -1027,7 +1027,7 @@ AddNewRelationTuple(Relation pg_class_desc, default: /* Views, etc, have no disk storage */ new_rel_reltup->relpages = 0; - new_rel_reltup->reltuples = 0; + new_rel_reltup->reltuples = -1; new_rel_reltup->relallvisible = 0; break; } diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 62e487bb4c..d0ec9a4b9c 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -2722,6 +2722,15 @@ index_update_stats(Relation rel, /* Should this be a more comprehensive test? */ Assert(rd_rel->relkind != RELKIND_PARTITIONED_INDEX); + /* + * As a special hack, if we are dealing with an empty table and the + * existing reltuples is -1, we leave that alone. This ensures that + * creating an index as part of CREATE TABLE doesn't cause the table to + * prematurely look like it's been vacuumed. + */ + if (reltuples == 0 && rd_rel->reltuples < 0) + reltuples = -1; + /* Apply required updates, if any, to copied tuple */ dirty = false; diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 23eb605d4c..308a51d95d 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -1128,8 +1128,8 @@ vacuum_set_xid_limits(Relation rel, * live tuples seen; but if we did not, we should not blindly extrapolate * from that number, since VACUUM may have scanned a quite nonrandom * subset of the table. When we have only partial information, we take - * the old value of pg_class.reltuples as a measurement of the - * tuple density in the unscanned pages. + * the old value of pg_class.reltuples/pg_class.relpages as a measurement + * of the tuple density in the unscanned pages. * * Note: scanned_tuples should count only *live* tuples, since * pg_class.reltuples is defined that way. @@ -1152,18 +1152,16 @@ vac_estimate_reltuples(Relation relation, /* * If scanned_pages is zero but total_pages isn't, keep the existing value - * of reltuples. (Note: callers should avoid updating the pg_class - * statistics in this situation, since no new information has been - * provided.) + * of reltuples. (Note: we might be returning -1 in this case.) */ if (scanned_pages == 0) return old_rel_tuples; /* - * If old value of relpages is zero, old density is indeterminate; we - * can't do much except scale up scanned_tuples to match total_pages. + * If old density is unknown, we can't do much except scale up + * scanned_tuples to match total_pages. */ - if (old_rel_pages == 0) + if (old_rel_tuples < 0 || old_rel_pages == 0) return floor((scanned_tuples / scanned_pages) * total_pages + 0.5); /* diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index 0eeff804bc..b399592ff8 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -912,7 +912,11 @@ set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) /* ... but do not let it set the rows estimate to zero */ rel->rows = clamp_row_est(rel->rows); - /* also, make sure rel->tuples is not insane relative to rel->rows */ + /* + * Also, make sure rel->tuples is not insane relative to rel->rows. + * Notably, this ensures sanity if pg_class.reltuples contains -1 and the + * FDW doesn't do anything to replace that. + */ rel->tuples = Max(rel->tuples, rel->rows); } diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 25545029d7..f9d0d67aa7 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -974,11 +974,6 @@ estimate_rel_size(Relation rel, int32 *attr_widths, /* it has storage, ok to call the smgr */ curpages = RelationGetNumberOfBlocks(rel); - /* coerce values in pg_class to more desirable types */ - relpages = (BlockNumber) rel->rd_rel->relpages; - reltuples = (double) rel->rd_rel->reltuples; - relallvisible = (BlockNumber) rel->rd_rel->relallvisible; - /* report estimated # pages */ *pages = curpages; /* quick exit if rel is clearly empty */ @@ -988,6 +983,7 @@ estimate_rel_size(Relation rel, int32 *attr_widths, *allvisfrac = 0; break; } + /* coerce values in pg_class to more desirable types */ relpages = (BlockNumber) rel->rd_rel->relpages; reltuples = (double) rel->rd_rel->reltuples; @@ -1006,12 +1002,12 @@ estimate_rel_size(Relation rel, int32 *attr_widths, } /* estimate number of tuples from previous tuple density */ - if (relpages > 0) + if (reltuples >= 0 && relpages > 0) density = reltuples / (double) relpages; else { /* - * When we have no data because the relation was truncated, + * If we have no data because the relation was never vacuumed, * estimate tuple width from attribute datatypes. We assume * here that the pages are completely full, which is OK for * tables (since they've presumably not been VACUUMed yet) but @@ -1059,6 +1055,7 @@ estimate_rel_size(Relation rel, int32 *attr_widths, break; case RELKIND_FOREIGN_TABLE: /* Just use whatever's in pg_class */ + /* Note that FDW must cope if reltuples is -1! */ *pages = rel->rd_rel->relpages; *tuples = rel->rd_rel->reltuples; *allvisfrac = 0; diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index c6ec657a93..1b8cd7bacd 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -3080,6 +3080,10 @@ relation_needs_vacanalyze(Oid relid, instuples = tabentry->inserts_since_vacuum; anltuples = tabentry->changes_since_analyze; + /* If the table hasn't yet been vacuumed, take reltuples as zero */ + if (reltuples < 0) + reltuples = 0; + vacthresh = (float4) vac_base_thresh + vac_scale_factor * reltuples; vacinsthresh = (float4) vac_ins_base_thresh + vac_ins_scale_factor * reltuples; anlthresh = (float4) anl_base_thresh + anl_scale_factor * reltuples; diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c index 9989df1107..8ef0917021 100644 --- a/src/backend/rewrite/rewriteDefine.c +++ b/src/backend/rewrite/rewriteDefine.c @@ -621,7 +621,7 @@ DefineQueryRewrite(const char *rulename, classForm->relam = InvalidOid; classForm->reltablespace = InvalidOid; classForm->relpages = 0; - classForm->reltuples = 0; + classForm->reltuples = -1; classForm->relallvisible = 0; classForm->reltoastrelid = InvalidOid; classForm->relhasindex = false; diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index a2453cf1f4..96ecad02dd 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -1870,7 +1870,7 @@ formrdesc(const char *relationName, Oid relationReltype, relation->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING; relation->rd_rel->relpages = 0; - relation->rd_rel->reltuples = 0; + relation->rd_rel->reltuples = -1; relation->rd_rel->relallvisible = 0; relation->rd_rel->relkind = RELKIND_RELATION; relation->rd_rel->relnatts = (int16) natts; @@ -3692,7 +3692,7 @@ RelationSetNewRelfilenode(Relation relation, char persistence) if (relation->rd_rel->relkind != RELKIND_SEQUENCE) { classform->relpages = 0; /* it's empty until further notice */ - classform->reltuples = 0; + classform->reltuples = -1; classform->relallvisible = 0; } classform->relfrozenxid = freezeXid; diff --git a/src/include/access/genam.h b/src/include/access/genam.h index 931257bd81..68d90f5141 100644 --- a/src/include/access/genam.h +++ b/src/include/access/genam.h @@ -38,8 +38,8 @@ typedef struct IndexBuildResult * * num_heap_tuples is accurate only when estimated_count is false; * otherwise it's just an estimate (currently, the estimate is the - * prior value of the relation's pg_class.reltuples field). It will - * always just be an estimate during ambulkdelete. + * prior value of the relation's pg_class.reltuples field, so it could + * even be -1). It will always just be an estimate during ambulkdelete. */ typedef struct IndexVacuumInfo { diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 573f1841b7..52ca61f8a8 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202008261 +#define CATALOG_VERSION_NO 202008301 #endif diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index 78b33b2a7f..679eec3443 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -62,8 +62,8 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat /* # of blocks (not always up-to-date) */ int32 relpages BKI_DEFAULT(0); - /* # of tuples (not always up-to-date) */ - float4 reltuples BKI_DEFAULT(0); + /* # of tuples (not always up-to-date; -1 means "unknown") */ + float4 reltuples BKI_DEFAULT(-1); /* # of all-visible blocks (not always up-to-date) */ int32 relallvisible BKI_DEFAULT(0);