WITH constants AS (
-- define some constants for sizes of things
-- for reference down the query and easy maintenance
SELECT
current_setting('block_size'):: numeric AS bs,
23 AS hdr,
8 AS ma
),
no_stats AS (
-- screen out table who have attributes
-- which dont have stats, such as JSON
SELECT
table_schema,
table_name,
n_live_tup :: numeric as est_rows,
pg_table_size(relid):: numeric as table_size
FROM
information_schema.columns
JOIN pg_stat_user_tables as psut ON table_schema = psut.schemaname
AND table_name = psut.relname
LEFT OUTER JOIN pg_stats ON table_schema = pg_stats.schemaname
AND table_name = pg_stats.tablename
AND column_name = attname
WHERE
attname IS NULL
AND table_schema NOT IN (
'pg_catalog', 'information_schema'
)
GROUP BY
table_schema,
table_name,
relid,
n_live_tup
),
null_headers AS (
-- calculate null header sizes
-- omitting tables which dont have complete stats
-- and attributes which aren't visible
SELECT
hdr + 1 +(
sum(
case when null_frac <> 0 THEN 1 else 0 END
)/ 8
) as nullhdr,
SUM(
(1 - null_frac)* avg_width
) as datawidth,
MAX(null_frac) as maxfracsum,
schemaname,
tablename,
hdr,
ma,
bs
FROM
pg_stats CROSS
JOIN constants
LEFT OUTER JOIN no_stats ON schemaname = no_stats.table_schema
AND tablename = no_stats.table_name
WHERE
schemaname NOT IN (
'pg_catalog', 'information_schema'
)
AND no_stats.table_name IS NULL
AND EXISTS (
SELECT
1
FROM
information_schema.columns
WHERE
schemaname = columns.table_schema
AND tablename = columns.table_name
)
GROUP BY
schemaname,
tablename,
hdr,
ma,
bs
),
data_headers AS (
-- estimate header and row size
SELECT
ma,
bs,
hdr,
schemaname,
tablename,
(
datawidth +(
hdr + ma -(
case when hdr % ma = 0 THEN ma ELSE hdr % ma END
)
)
):: numeric AS datahdr,
(
maxfracsum *(
nullhdr + ma -(
case when nullhdr % ma = 0 THEN ma ELSE nullhdr % ma END
)
)
) AS nullhdr2
FROM
null_headers
),
table_estimates AS (
-- make estimates of how large the table should be
-- based on row and page size
SELECT
schemaname,
tablename,
bs,
reltuples :: numeric as est_rows,
relpages * bs as table_bytes,
CEIL(
(
reltuples * (
datahdr + nullhdr2 + 4 + ma - (
CASE WHEN datahdr % ma = 0 THEN ma ELSE datahdr % ma END
)
)/(bs - 20)
)
) * bs AS expected_bytes,
reltoastrelid
FROM
data_headers
JOIN pg_class ON tablename = relname
JOIN pg_namespace ON relnamespace = pg_namespace.oid
AND schemaname = nspname
WHERE
pg_class.relkind = 'r'
),
estimates_with_toast AS (
-- add in estimated TOAST table sizes
-- estimate based on 4 toast tuples per page because we dont have
-- anything better. also append the no_data tables
SELECT
schemaname,
tablename,
TRUE as can_estimate,
est_rows,
table_bytes + (
coalesce(toast.relpages, 0) * bs
) as table_bytes,
expected_bytes + (
ceil(
coalesce(toast.reltuples, 0) / 4
) * bs
) as expected_bytes
FROM
table_estimates
LEFT OUTER JOIN pg_class as toast ON table_estimates.reltoastrelid = toast.oid
AND toast.relkind = 't'
),
table_estimates_plus AS (
-- add some extra metadata to the table data
-- and calculations to be reused
-- including whether we cant estimate it
-- or whether we think it might be compressed
SELECT
current_database() as databasename,
schemaname,
tablename,
can_estimate,
est_rows,
CASE WHEN table_bytes > 0 THEN table_bytes :: NUMERIC ELSE NULL :: NUMERIC END AS table_bytes,
CASE WHEN expected_bytes > 0 THEN expected_bytes :: NUMERIC ELSE NULL :: NUMERIC END AS expected_bytes,
CASE WHEN expected_bytes > 0
AND table_bytes > 0
AND expected_bytes <= table_bytes THEN (table_bytes - expected_bytes):: NUMERIC ELSE 0 :: NUMERIC END AS bloat_bytes
FROM
estimates_with_toast
UNION ALL
SELECT
current_database() as databasename,
table_schema,
table_name,
FALSE,
est_rows,
table_size,
NULL :: NUMERIC,
NULL :: NUMERIC
FROM
no_stats
),
bloat_data AS (
-- do final math calculations and formatting
select
current_database() as databasename,
schemaname,
tablename,
can_estimate,
table_bytes,
round(
table_bytes /(1024 ^ 2):: NUMERIC,
3
) as table_mb,
expected_bytes,
round(
expected_bytes /(1024 ^ 2):: NUMERIC,
3
) as expected_mb,
round(bloat_bytes * 100 / table_bytes) as pct_bloat,
round(
bloat_bytes /(1024 :: NUMERIC ^ 2),
2
) as mb_bloat,
table_bytes,
expected_bytes,
est_rows
FROM
table_estimates_plus
) -- filter output for bloated tables
SELECT
databasename,
schemaname,
tablename,
can_estimate,
est_rows,
pct_bloat,
mb_bloat,
table_mb
FROM
bloat_data -- this where clause defines which tables actually appear
-- in the bloat chart
-- example below filters for tables which are either 50%
-- bloated and more than 20mb in size, or more than 25%
-- bloated and more than 1GB in size
WHERE
(
pct_bloat >= 0
AND mb_bloat >= 0
) -- OR ( pct_bloat >= 10 AND mb_bloat >= 100 )
--- where tablename='generalledgerdetail'
ORDER BY
mb_bloat DESC
limit
20;
No comments:
Post a Comment