Skip to content

PostgreSQL 状态查看与诊断

状态查看概述

PostgreSQL提供了丰富的内置视图和工具,用于查看数据库的各种状态信息。这些信息对于DBA进行日常运维、性能优化和故障诊断至关重要。

系统状态查看

1. 数据库版本信息

sql
-- 查看完整版本信息
SELECT version();

-- 查看主版本号
SELECT current_setting('server_version_num')::int;

2. 数据库启动时间

sql
SELECT pg_postmaster_start_time();

3. 配置参数

sql
-- 查看所有配置参数
SELECT name, setting, unit, category, short_desc 
FROM pg_settings 
ORDER BY category;

-- 查看特定参数
SELECT name, setting, short_desc 
FROM pg_settings 
WHERE name IN ('shared_buffers', 'work_mem', 'maintenance_work_mem');

-- 查看被修改的参数
SELECT name, setting, boot_val, reset_val 
FROM pg_settings 
WHERE setting != boot_val;

4. 系统资源使用情况

sql
-- 查看系统负载(PostgreSQL 9.2+)
SELECT * FROM pg_stat_sys_cpu;

-- 查看内存使用情况(PostgreSQL 14+)
SELECT * FROM pg_stat_sys_memory;

5. 数据库空间使用情况

sql
-- 查看数据库大小
SELECT datname, pg_size_pretty(pg_database_size(datname)) AS size
FROM pg_database
ORDER BY pg_database_size(datname) DESC;

-- 查看表大小(包括索引)
SELECT schemaname, tablename, 
       pg_size_pretty(pg_total_relation_size(schemaname || '.' || tablename)) AS total_size
FROM pg_tables
ORDER BY pg_total_relation_size(schemaname || '.' || tablename) DESC;

-- 查看索引大小
SELECT schemaname, indexname, tablename,
       pg_size_pretty(pg_indexes_size(schemaname || '.' || tablename)) AS index_size
FROM pg_indexes
ORDER BY pg_indexes_size(schemaname || '.' || tablename) DESC;

会话状态查看

1. 当前活跃会话

sql
-- 查看所有活跃会话
SELECT pid, datname, usename, application_name, client_addr, 
       state, query_start, query
FROM pg_stat_activity
WHERE state = 'active';

-- 查看长时间运行的会话
SELECT pid, datname, usename, application_name, client_addr, 
       state, query_start, 
       NOW() - query_start AS duration,
       query
FROM pg_stat_activity
WHERE state = 'active'
  AND NOW() - query_start > interval '5 minutes'
ORDER BY duration DESC;

2. 会话等待状态

sql
-- 查看会话等待事件(PostgreSQL 9.6+)
SELECT pid, datname, usename, application_name, client_addr, 
       state, wait_event_type, wait_event,
       NOW() - query_start AS duration,
       query
FROM pg_stat_activity
WHERE wait_event IS NOT NULL
ORDER BY duration DESC;

-- 查看锁等待事件
SELECT pid, datname, usename, application_name, client_addr, 
       state, wait_event_type, wait_event,
       NOW() - query_start AS duration,
       query
FROM pg_stat_activity
WHERE wait_event_type = 'Lock'
ORDER BY duration DESC;

3. 终止会话

sql
-- 终止指定PID的会话
SELECT pg_terminate_backend(12345);

-- 终止特定数据库的所有会话
SELECT pg_terminate_backend(pid)
FROM pg_stat_activity
WHERE datname = 'mydb' AND pid != pg_backend_pid();

事务状态查看

1. 当前活跃事务

sql
-- 查看活跃事务
SELECT pid, datname, usename, application_name, client_addr, 
       xact_start, query_start, state,
       NOW() - xact_start AS xact_duration,
       query
FROM pg_stat_activity
WHERE xact_start IS NOT NULL
ORDER BY xact_duration DESC;

-- 查看长时间运行的事务
SELECT pid, datname, usename, application_name, client_addr, 
       xact_start, query_start, state,
       NOW() - xact_start AS xact_duration,
       query
FROM pg_stat_activity
WHERE xact_start IS NOT NULL
  AND NOW() - xact_start > interval '10 minutes'
ORDER BY xact_duration DESC;

2. 事务统计信息

sql
-- 查看事务提交和回滚统计
SELECT datname, xact_commit, xact_rollback,
       ROUND(xact_rollback::numeric / NULLIF(xact_commit + xact_rollback, 0) * 100, 2) AS rollback_ratio
FROM pg_stat_database
ORDER BY datname;

3. 事务ID回卷风险

sql
-- 查看数据库最老的事务ID
SELECT datname, age(datfrozenxid) AS txid_age
FROM pg_database
ORDER BY txid_age DESC;

-- 查看表最老的事务ID
SELECT schemaname, tablename, age(relfrozenxid) AS txid_age
FROM pg_class
WHERE relkind = 'r'
ORDER BY txid_age DESC
LIMIT 10;

锁状态查看

1. 锁概述

PostgreSQL的锁类型包括:

  • ACCESS SHARE (S)
  • ROW SHARE (RS)
  • ROW EXCLUSIVE (RX)
  • SHARE UPDATE EXCLUSIVE (SU)
  • SHARE (S)
  • SHARE ROW EXCLUSIVE (SRX)
  • EXCLUSIVE (X)
  • ACCESS EXCLUSIVE (AX)

2. 查看锁信息

sql
-- 查看锁信息
SELECT locktype, database, relation::regclass, page, tuple, 
       virtualxid, transactionid, classid, objid, objsubid,
       pid, mode, granted, fastpath
FROM pg_locks
WHERE pid != pg_backend_pid();

-- 查看锁与会话关联信息
SELECT l.locktype, l.database, l.relation::regclass, l.page, l.tuple, 
       l.virtualxid, l.transactionid, l.mode, l.granted,
       a.pid, a.datname, a.usename, a.application_name, a.client_addr,
       a.state, a.query_start, a.query
FROM pg_locks l
JOIN pg_stat_activity a ON l.pid = a.pid
WHERE a.pid != pg_backend_pid()
ORDER BY a.query_start;

3. 锁等待链分析

sql
-- 查看锁等待链
WITH RECURSIVE lock_chain AS (
    SELECT 
        l1.pid AS waiting_pid,
        l1.mode AS waiting_mode,
        l1.locktype AS waiting_locktype,
        l1.relation::regclass AS waiting_relation,
        l1.transactionid AS waiting_txid,
        l2.pid AS blocking_pid,
        l2.mode AS blocking_mode,
        l2.locktype AS blocking_locktype,
        l2.relation::regclass AS blocking_relation,
        l2.transactionid AS blocking_txid,
        ARRAY[l1.pid] AS path
    FROM pg_locks l1
    JOIN pg_locks l2 ON (
        (l1.locktype = l2.locktype AND 
         l1.database = l2.database AND 
         l1.relation = l2.relation AND 
         l1.page = l2.page AND 
         l1.tuple = l2.tuple AND 
         l1.virtualxid = l2.virtualxid AND 
         l1.transactionid = l2.transactionid AND 
         l1.classid = l2.classid AND 
         l1.objid = l2.objid AND 
         l1.objsubid = l2.objsubid) OR
        (l1.locktype = 'transactionid' AND l1.transactionid = l2.transactionid)
    )
    WHERE l1.granted = false AND l2.granted = true
    
    UNION ALL
    
    SELECT 
        lc.waiting_pid,
        lc.waiting_mode,
        lc.waiting_locktype,
        lc.waiting_relation,
        lc.waiting_txid,
        l2.pid AS blocking_pid,
        l2.mode AS blocking_mode,
        l2.locktype AS blocking_locktype,
        l2.relation::regclass AS blocking_relation,
        l2.transactionid AS blocking_txid,
        lc.path || l2.pid AS path
    FROM lock_chain lc
    JOIN pg_locks l1 ON lc.blocking_pid = l1.pid AND l1.granted = true
    JOIN pg_locks l2 ON (
        (l1.locktype = l2.locktype AND 
         l1.database = l2.database AND 
         l1.relation = l2.relation AND 
         l1.page = l2.page AND 
         l1.tuple = l2.tuple AND 
         l1.virtualxid = l2.virtualxid AND 
         l1.transactionid = l2.transactionid AND 
         l1.classid = l2.classid AND 
         l1.objid = l2.objid AND 
         l1.objsubid = l2.objsubid) OR
        (l1.locktype = 'transactionid' AND l1.transactionid = l2.transactionid)
    )
    WHERE l2.granted = true AND NOT l2.pid = ANY(lc.path)
)
SELECT 
    waiting_pid, 
    waiting_mode, 
    waiting_locktype, 
    waiting_relation, 
    waiting_txid, 
    blocking_pid, 
    blocking_mode, 
    blocking_locktype, 
    blocking_relation, 
    blocking_txid,
    path
FROM lock_chain;

复制状态查看

1. 主从复制状态

sql
-- 在主库查看复制槽状态
SELECT slot_name, plugin, slot_type, datoid, database, active, 
       xmin, catalog_xmin, restart_lsn, confirmed_flush_lsn
FROM pg_replication_slots;

-- 在主库查看连接的从库
SELECT client_addr, client_hostname, client_port, backend_start, 
       state, sent_lsn, write_lsn, flush_lsn, replay_lsn, 
       sync_priority, sync_state
FROM pg_stat_replication;

-- 计算复制延迟
SELECT client_addr, 
       state, 
       sent_lsn, write_lsn, flush_lsn, replay_lsn, 
       (sent_lsn - replay_lsn)::text AS replay_delay_bytes,
       pg_wal_lsn_diff(sent_lsn, replay_lsn) AS replay_delay_b,
       (extract(epoch from now()) - extract(epoch from backend_start))::int AS connected_seconds
FROM pg_stat_replication;

2. 在从库查看复制状态

sql
-- 查看从库复制状态
SELECT pg_is_in_recovery(), 
       pg_last_wal_receive_lsn(), 
       pg_last_wal_replay_lsn(),
       pg_last_xact_replay_timestamp(),
       pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn()) AS replay_lag_bytes,
       CASE WHEN pg_last_xact_replay_timestamp() IS NOT NULL 
            THEN now() - pg_last_xact_replay_timestamp() 
            ELSE NULL 
       END AS replay_lag_time;

-- 查看recovery.conf或recovery.signal配置
SHOW primary_conninfo;
SHOW primary_slot_name;

3. 逻辑复制状态

sql
-- 查看发布者状态
SELECT * FROM pg_publication;
SELECT * FROM pg_publication_tables;

-- 查看订阅者状态
SELECT * FROM pg_subscription;
SELECT * FROM pg_stat_subscription;

性能状态查看

1. 查询执行统计

sql
-- 查看慢查询统计(需要pg_stat_statements扩展)
CREATE EXTENSION IF NOT EXISTS pg_stat_statements;

SELECT queryid, query, calls, total_exec_time, mean_exec_time, 
       min_exec_time, max_exec_time, rows, shared_blks_hit, 
       shared_blks_read, shared_blks_dirtied, shared_blks_written,
       local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written,
       temp_blks_read, temp_blks_written, blk_read_time, blk_write_time
FROM pg_stat_statements 
ORDER BY total_exec_time DESC
LIMIT 10;

2. 索引使用情况

sql
-- 查看索引使用情况
SELECT schemaname, relname, indexrelname, idx_scan, idx_tup_read, 
       idx_tup_fetch, idx_blks_read, idx_blks_hit
FROM pg_stat_user_indexes ui
JOIN pg_statio_user_indexes si ON ui.indexrelid = si.indexrelid
ORDER BY idx_scan ASC;

-- 查看未使用的索引
SELECT schemaname, relname, indexrelname, idx_scan
FROM pg_stat_user_indexes
WHERE idx_scan = 0
ORDER BY relname, indexrelname;

3. 表使用情况

sql
-- 查看表使用情况
SELECT schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch,
       n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd,
       n_live_tup, n_dead_tup, last_vacuum, last_autovacuum,
       last_analyze, last_autoanalyze
FROM pg_stat_user_tables
ORDER BY n_dead_tup DESC;

-- 查看需要VACUUM的表
SELECT schemaname, relname, n_live_tup, n_dead_tup,
       round(n_dead_tup::numeric / NULLIF(n_live_tup, 0) * 100, 2) AS dead_tup_ratio
FROM pg_stat_user_tables
WHERE n_dead_tup > 1000
  AND round(n_dead_tup::numeric / NULLIF(n_live_tup, 0) * 100, 2) > 10
ORDER BY dead_tup_ratio DESC;

4. 缓冲区使用情况

sql
-- 查看缓冲区命中率
SELECT 
    sum(heap_blks_hit) as heap_hit,
    sum(heap_blks_read) as heap_read,
    sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read))::numeric as heap_hit_ratio,
    sum(idx_blks_hit) as idx_hit,
    sum(idx_blks_read) as idx_read,
    sum(idx_blks_hit) / (sum(idx_blks_hit) + sum(idx_blks_read))::numeric as idx_hit_ratio,
    sum(toast_blks_hit) as toast_hit,
    sum(toast_blks_read) as toast_read,
    sum(toast_blks_hit) / (sum(toast_blks_hit) + sum(toast_blks_read))::numeric as toast_hit_ratio,
    sum(tidx_blks_hit) as tidx_hit,
    sum(tidx_blks_read) as tidx_read,
    sum(tidx_blks_hit) / (sum(tidx_blks_hit) + sum(tidx_blks_read))::numeric as tidx_hit_ratio
FROM pg_statio_user_tables;

检查点和WAL状态

sql
-- 查看检查点信息
SELECT * FROM pg_stat_checkpoints;

-- 查看WAL写入统计
SELECT * FROM pg_stat_wal;

-- 查看WAL缓冲区统计
SELECT * FROM pg_stat_bgwriter;

死锁和冲突信息

sql
-- 查看死锁统计
SELECT * FROM pg_stat_database_conflicts;

-- 查看最后一次死锁信息(需要配置log_lock_waits和log_statement='deadlock')
-- 从错误日志中查看

不同PostgreSQL版本的状态查看差异

PostgreSQL 9.x

  • 有限的等待事件类型
  • 不支持pg_stat_sys_cpu和pg_stat_sys_memory
  • 逻辑复制功能有限
  • 慢查询统计功能较弱

PostgreSQL 10+

  • 增强的等待事件类型
  • 支持pg_stat_statements的更多指标
  • 改进的复制状态视图
  • 支持逻辑复制

PostgreSQL 12+

  • 增强的锁等待信息
  • 支持pg_stat_wal视图
  • 改进的VACUUM统计
  • 支持并行查询统计

PostgreSQL 14+

  • 增强的系统资源统计(pg_stat_sys_memory)
  • 改进的复制延迟计算
  • 支持pg_stat_progress_vacuum的更多指标
  • 增强的死锁检测

常用状态查看工具

1. 内置命令行工具

  • psql:交互式命令行工具,支持执行SQL查询
  • pg_controldata:查看数据库控制信息
  • pg_waldump:解析WAL日志
  • pg_basebackup:创建基础备份

2. 第三方工具

  • pgAdmin:图形化管理工具,提供状态查看界面
  • pg_top:实时监控PostgreSQL进程
  • pg_stat_monitor:增强版的pg_stat_statements
  • Prometheus + Grafana:监控和可视化PostgreSQL状态

状态查看最佳实践

1. 定期检查

  • 每日检查数据库空间使用情况
  • 每周检查长事务和慢查询
  • 每月检查索引使用情况和VACUUM状态

2. 建立基准

  • 记录正常状态下的各项指标
  • 建立告警阈值
  • 定期比较指标变化

3. 综合分析

  • 结合日志分析和状态查看
  • 考虑系统资源和应用负载
  • 综合判断问题原因

4. 自动化监控

  • 使用监控工具自动收集状态信息
  • 设置告警规则
  • 生成定期报告

常见问题诊断示例

1. 数据库响应缓慢

sql
-- 查看活跃会话和等待事件
SELECT pid, datname, usename, application_name, client_addr, 
       state, wait_event_type, wait_event,
       NOW() - query_start AS duration,
       query
FROM pg_stat_activity
WHERE state = 'active'
ORDER BY duration DESC;

-- 查看锁等待
SELECT * FROM pg_locks WHERE granted = false;

-- 查看慢查询
SELECT * FROM pg_stat_statements ORDER BY total_exec_time DESC LIMIT 10;

2. 复制延迟

sql
-- 在主库查看复制状态
SELECT client_addr, state, sent_lsn, write_lsn, flush_lsn, replay_lsn,
       pg_wal_lsn_diff(sent_lsn, replay_lsn) AS replay_delay_b
FROM pg_stat_replication;

-- 在从库查看复制状态
SELECT pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn(),
       pg_last_xact_replay_timestamp(),
       now() - pg_last_xact_replay_timestamp() AS replay_lag_time;

3. 空间不足

sql
-- 查看数据库大小
SELECT datname, pg_size_pretty(pg_database_size(datname)) AS size
FROM pg_database
ORDER BY pg_database_size(datname) DESC;

-- 查看大表
SELECT schemaname, tablename, 
       pg_size_pretty(pg_total_relation_size(schemaname || '.' || tablename)) AS total_size
FROM pg_tables
ORDER BY pg_total_relation_size(schemaname || '.' || tablename) DESC
LIMIT 20;

-- 查看WAL文件大小
SELECT pg_size_pretty(pg_wal_lsn_diff(pg_current_wal_lsn(), '0/0')) AS wal_size;

总结

PostgreSQL提供了丰富的状态查看功能,DBA需要熟练掌握这些功能,以便快速诊断和解决数据库问题。定期检查数据库状态,建立基准指标,结合自动化监控工具,可以有效提高数据库的可靠性和性能。不同PostgreSQL版本的状态查看功能有所差异,DBA需要根据实际版本选择合适的查看方法。