Translate into your own language

Tuesday, January 13, 2026

Script: Automated Reindexing of Indexes with Bloat > 30%

 #!/bin/bash

set -euo pipefail


# ================= CONFIG =================

PGDATABASE="<dbname>"

LOGFILE="/tmp/reindex_concurrently_$(date +%F_%H%M).log"


# DRY RUN mode:

# true  = only print commands (no execution)

# false = execute REINDEX commands

DRY_RUN=true


# Login command (runs psql as postgres user)

psql="psql -XqtA -d $PGDATABASE"

# ==========================================


echo "===== REINDEX CONCURRENTLY STARTED: $(date) =====" | tee -a "$LOGFILE"

echo "DRY_RUN = $DRY_RUN" | tee -a "$LOGFILE"


$psql <<'EOF' |

WITH btree_index_atts AS (

    SELECT

        nspname,

        indexclass.relname AS index_name,

        indexclass.reltuples,

        indexclass.relpages,

        indrelid,

        indexrelid,

        indexclass.relam,

        tableclass.relname AS tablename,

        regexp_split_to_table(indkey::text, ' ')::smallint AS attnum,

        indexrelid AS index_oid

    FROM pg_index

    JOIN pg_class AS indexclass ON pg_index.indexrelid = indexclass.oid

    JOIN pg_class AS tableclass ON pg_index.indrelid = tableclass.oid

    JOIN pg_namespace ON pg_namespace.oid = indexclass.relnamespace

    JOIN pg_am ON indexclass.relam = pg_am.oid

    WHERE pg_am.amname = 'btree'

      AND indexclass.relpages > 0

      AND nspname NOT IN ('pg_catalog','information_schema')

),

index_item_sizes AS (

    SELECT

        ind_atts.nspname,

        ind_atts.index_name,

        ind_atts.reltuples,

        ind_atts.relpages,

        ind_atts.relam,

        indrelid AS table_oid,

        index_oid,

        current_setting('block_size')::numeric AS bs,

        8 AS maxalign,

        24 AS pagehdr,

        CASE

            WHEN max(coalesce(pg_stats.null_frac,0)) = 0 THEN 2

            ELSE 6

        END AS index_tuple_hdr,

        SUM((1 - coalesce(pg_stats.null_frac, 0))

            * coalesce(pg_stats.avg_width, 1024)) AS nulldatawidth

    FROM pg_attribute

    JOIN btree_index_atts AS ind_atts

        ON pg_attribute.attrelid = ind_atts.indexrelid

       AND pg_attribute.attnum = ind_atts.attnum

    JOIN pg_stats

        ON pg_stats.schemaname = ind_atts.nspname

       AND (

            (pg_stats.tablename = ind_atts.tablename

             AND pg_stats.attname = pg_catalog.pg_get_indexdef(

                 pg_attribute.attrelid, pg_attribute.attnum, TRUE))

            OR

            (pg_stats.tablename = ind_atts.index_name

             AND pg_stats.attname = pg_attribute.attname)

        )

    WHERE pg_attribute.attnum > 0

    GROUP BY 1,2,3,4,5,6,7,8,9

),

index_aligned_est AS (

    SELECT

        maxalign, bs, nspname, index_name, reltuples,

        relpages, relam, table_oid, index_oid,

        COALESCE (

            CEIL (

                reltuples * (

                    6 + maxalign

                    - CASE

                        WHEN index_tuple_hdr % maxalign = 0 THEN maxalign

                        ELSE index_tuple_hdr % maxalign

                      END

                    + nulldatawidth

                    + maxalign

                    - CASE

                        WHEN nulldatawidth::int % maxalign = 0 THEN maxalign

                        ELSE nulldatawidth::int % maxalign

                      END

                )::numeric / (bs - pagehdr::numeric) + 1

            ),

            0

        ) AS expected

    FROM index_item_sizes

),

raw_bloat AS (

    SELECT

        nspname,

        index_name,

        bs * relpages::bigint AS totalbytes,

        expected,

        CASE

            WHEN relpages <= expected THEN 0

            ELSE (relpages - expected) * 100.0 / relpages

        END AS realbloat,

        bs * GREATEST(relpages - expected,0)::bigint AS wastedbytes

    FROM index_aligned_est

),

format_bloat AS (

    SELECT

        nspname AS schema_name,

        index_name,

        ROUND(realbloat) AS bloat_pct,

        ROUND(wastedbytes / (1024^3)::numeric, 3) AS bloat_gb

    FROM raw_bloat

)

SELECT

    'REINDEX INDEX CONCURRENTLY '

    || quote_ident(schema_name) || '.'

    || quote_ident(index_name) || ';'

FROM format_bloat

WHERE bloat_pct > 90

  AND bloat_gb > 0.01

ORDER BY bloat_gb DESC;

EOF

while read -r CMD

do

    [ -z "$CMD" ] && continue


    echo "--------------------------------------------" | tee -a "$LOGFILE"

    echo "Command: $CMD" | tee -a "$LOGFILE"


    if [ "$DRY_RUN" = true ]; then

        echo "DRY RUN: Not executing (just printing)" | tee -a "$LOGFILE"

    else

        echo "Executing..." | tee -a "$LOGFILE"

        $psql -c "$CMD" >> "$LOGFILE" 2>&1

        echo "Done." | tee -a "$LOGFILE"

        sleep 30

    fi

done


echo "===== REINDEX CONCURRENTLY COMPLETED: $(date) =====" | tee -a "$LOGFILE"

echo "Log file: $LOGFILE"


No comments:

Post a Comment