DbaKeeda: PostgreSQL - Table Bloat

WITH constants AS (

-- define some constants for sizes of things

-- for reference down the query and easy maintenance

SELECT

current_setting('block_size'):: numeric AS bs,

23 AS hdr,

8 AS ma

no_stats AS (

-- screen out table who have attributes

-- which dont have stats, such as JSON

SELECT

table_schema,

table_name,

n_live_tup :: numeric as est_rows,

pg_table_size(relid):: numeric as table_size

FROM

information_schema.columns

JOIN pg_stat_user_tables as psut ON table_schema = psut.schemaname

AND table_name = psut.relname

LEFT OUTER JOIN pg_stats ON table_schema = pg_stats.schemaname

AND table_name = pg_stats.tablename

AND column_name = attname

WHERE

attname IS NULL

AND table_schema NOT IN (

'pg_catalog', 'information_schema'

)

GROUP BY

table_schema,

table_name,

relid,

n_live_tup

null_headers AS (

-- calculate null header sizes

-- omitting tables which dont have complete stats

-- and attributes which aren't visible

SELECT

hdr + 1 +(

sum(

case when null_frac <> 0 THEN 1 else 0 END

)/ 8

) as nullhdr,

SUM(

(1 - null_frac)* avg_width

) as datawidth,

MAX(null_frac) as maxfracsum,

schemaname,

tablename,

hdr,

ma,

FROM

pg_stats CROSS

JOIN constants

LEFT OUTER JOIN no_stats ON schemaname = no_stats.table_schema

AND tablename = no_stats.table_name

WHERE

schemaname NOT IN (

'pg_catalog', 'information_schema'

)

AND no_stats.table_name IS NULL

AND EXISTS (

SELECT

FROM

information_schema.columns

WHERE

schemaname = columns.table_schema

AND tablename = columns.table_name

)

GROUP BY

schemaname,

tablename,

hdr,

ma,

data_headers AS (

-- estimate header and row size

SELECT

ma,

bs,

hdr,

schemaname,

tablename,

(

datawidth +(

hdr + ma -(

case when hdr % ma = 0 THEN ma ELSE hdr % ma END

)

):: numeric AS datahdr,

(

maxfracsum *(

nullhdr + ma -(

case when nullhdr % ma = 0 THEN ma ELSE nullhdr % ma END

)

) AS nullhdr2

FROM

null_headers

table_estimates AS (

-- make estimates of how large the table should be

-- based on row and page size

SELECT

schemaname,

tablename,

bs,

reltuples :: numeric as est_rows,

relpages * bs as table_bytes,

CEIL(

(

reltuples * (

datahdr + nullhdr2 + 4 + ma - (

CASE WHEN datahdr % ma = 0 THEN ma ELSE datahdr % ma END

)

)/(bs - 20)

)

) * bs AS expected_bytes,

reltoastrelid

FROM

data_headers

JOIN pg_class ON tablename = relname

JOIN pg_namespace ON relnamespace = pg_namespace.oid

AND schemaname = nspname

WHERE

pg_class.relkind = 'r'

estimates_with_toast AS (

-- add in estimated TOAST table sizes

-- estimate based on 4 toast tuples per page because we dont have

-- anything better. also append the no_data tables

SELECT

schemaname,

tablename,

TRUE as can_estimate,

est_rows,

table_bytes + (

coalesce(toast.relpages, 0) * bs

) as table_bytes,

expected_bytes + (

ceil(

coalesce(toast.reltuples, 0) / 4

) * bs

) as expected_bytes

FROM

table_estimates

LEFT OUTER JOIN pg_class as toast ON table_estimates.reltoastrelid = toast.oid

AND toast.relkind = 't'

table_estimates_plus AS (

-- add some extra metadata to the table data

-- and calculations to be reused

-- including whether we cant estimate it

-- or whether we think it might be compressed

SELECT

current_database() as databasename,

schemaname,

tablename,

can_estimate,

est_rows,

CASE WHEN table_bytes > 0 THEN table_bytes :: NUMERIC ELSE NULL :: NUMERIC END AS table_bytes,

CASE WHEN expected_bytes > 0 THEN expected_bytes :: NUMERIC ELSE NULL :: NUMERIC END AS expected_bytes,

CASE WHEN expected_bytes > 0

AND table_bytes > 0

AND expected_bytes <= table_bytes THEN (table_bytes - expected_bytes):: NUMERIC ELSE 0 :: NUMERIC END AS bloat_bytes

FROM

estimates_with_toast

UNION ALL

SELECT

current_database() as databasename,

table_schema,

table_name,

FALSE,

est_rows,

table_size,

NULL :: NUMERIC,

NULL :: NUMERIC

FROM

no_stats

bloat_data AS (

-- do final math calculations and formatting

select

current_database() as databasename,

schemaname,

tablename,

can_estimate,

table_bytes,

round(

table_bytes /(1024 ^ 2):: NUMERIC,

) as table_mb,

expected_bytes,

round(

expected_bytes /(1024 ^ 2):: NUMERIC,

) as expected_mb,

round(bloat_bytes * 100 / table_bytes) as pct_bloat,

round(

bloat_bytes /(1024 :: NUMERIC ^ 2),

) as mb_bloat,

table_bytes,

expected_bytes,

est_rows

FROM

table_estimates_plus

) -- filter output for bloated tables

SELECT

databasename,

schemaname,

tablename,

can_estimate,

est_rows,

pct_bloat,

mb_bloat,

table_mb

FROM

bloat_data -- this where clause defines which tables actually appear

-- in the bloat chart

-- example below filters for tables which are either 50%

-- bloated and more than 20mb in size, or more than 25%

-- bloated and more than 1GB in size

WHERE

(

pct_bloat >= 0

AND mb_bloat >= 0

) -- OR ( pct_bloat >= 10 AND mb_bloat >= 100 )

--- where tablename='generalledgerdetail'

ORDER BY

mb_bloat DESC

limit

20;

DbaKeeda

Translate into your own language

Monday, February 16, 2026

PostgreSQL - Table Bloat

No comments:

Post a Comment