Skip to content

PostgreSQL 索引使用分析

索引使用统计

PostgreSQL提供了丰富的视图来监控索引的使用情况,主要通过pg_stat_user_indexespg_statio_user_indexes视图获取。

1. 查看索引使用情况

sql
-- 查看索引使用统计
SELECT 
  schemaname,
  relname AS table_name,
  indexrelname AS index_name,
  idx_scan AS index_scans,
  idx_tup_read AS index_tuples_read,
  idx_tup_fetch AS index_tuples_fetched,
  idx_scan + idx_tup_read + idx_tup_fetch AS total_usage
FROM pg_stat_user_indexes
ORDER BY total_usage DESC;

-- 查看索引大小和IO统计
SELECT 
  schemaname,
  relname AS table_name,
  indexrelname AS index_name,
  pg_size_pretty(pg_relation_size(i.indexrelid)) AS index_size,
  idx_blks_read AS index_blocks_read,
  idx_blks_hit AS index_blocks_hit,
  CASE WHEN idx_blks_read + idx_blks_hit > 0 
    THEN idx_blks_hit / (idx_blks_read + idx_blks_hit)::numeric * 100 
    ELSE 0 
  END AS index_hit_rate
FROM pg_stat_user_indexes i
JOIN pg_statio_user_indexes s ON i.indexrelid = s.indexrelid
ORDER BY index_size DESC;

2. 计算索引使用效率

sql
-- 计算索引扫描与表扫描的比例
SELECT 
  t.schemaname,
  t.relname AS table_name,
  indexrelname AS index_name,
  idx_scan AS index_scans,
  t.seq_scan AS table_scans,
  CASE WHEN t.seq_scan > 0 
    THEN idx_scan / t.seq_scan::numeric * 100 
    ELSE 100 
  END AS index_to_table_ratio
FROM pg_stat_user_indexes i
JOIN pg_stat_user_tables t ON i.schemaname = t.schemaname AND i.relname = t.relname
ORDER BY index_to_table_ratio DESC;

-- 计算索引命中率
SELECT 
  schemaname,
  relname AS table_name,
  indexrelname AS index_name,
  idx_scan AS index_scans,
  idx_tup_read AS index_tuples_read,
  idx_tup_fetch AS index_tuples_fetched,
  CASE WHEN idx_tup_read > 0 
    THEN idx_tup_fetch / idx_tup_read::numeric * 100 
    ELSE 0 
  END AS index_efficiency
FROM pg_stat_user_indexes
ORDER BY index_efficiency DESC;

未使用索引检测

识别未使用的索引可以帮助我们清理冗余索引,提高写入性能。

1. 查找未使用的索引

sql
-- 查找从未使用的索引(索引扫描次数为0)
SELECT 
  schemaname,
  relname AS table_name,
  indexrelname AS index_name,
  pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
WHERE idx_scan = 0
  AND schemaname NOT IN ('pg_catalog', 'information_schema')
ORDER BY index_size DESC;

-- 查找很少使用的索引(索引扫描次数<100)
SELECT 
  schemaname,
  relname AS table_name,
  indexrelname AS index_name,
  idx_scan AS index_scans,
  pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
WHERE idx_scan < 100
  AND schemaname NOT IN ('pg_catalog', 'information_schema')
ORDER BY index_size DESC;

2. 考虑保留的特殊索引

有些索引虽然使用频率低,但在特定场景下很重要,需要谨慎评估:

  • 用于唯一性约束的索引
  • 用于外键约束的索引
  • 用于定期批量查询的索引
  • 用于报表和数据分析的索引
sql
-- 检查索引是否与约束相关
SELECT 
  i.schemaname,
  i.relname AS table_name,
  i.indexrelname AS index_name,
  i.idx_scan AS index_scans,
  c.conname AS constraint_name,
  c.contype AS constraint_type,
  pg_size_pretty(pg_relation_size(i.indexrelid)) AS index_size
FROM pg_stat_user_indexes i
LEFT JOIN pg_constraint c ON i.indexrelid = c.conindid
WHERE i.idx_scan = 0
  AND i.schemaname NOT IN ('pg_catalog', 'information_schema')
ORDER BY index_size DESC;

索引使用模式分析

1. 分析索引使用趋势

sql
-- 重置索引统计(谨慎使用,会丢失历史数据)
SELECT pg_stat_reset_indexes();

-- 或重置特定表的索引统计
SELECT pg_stat_reset_single_table_counters('table_name'::regclass);

-- 查看索引使用增长情况(需要定期采集数据)
-- 可以创建一个存储过程定期保存索引统计数据
CREATE TABLE index_usage_history (
  id SERIAL PRIMARY KEY,
  capture_time TIMESTAMP DEFAULT NOW(),
  schemaname TEXT,
  table_name TEXT,
  index_name TEXT,
  index_scans BIGINT,
  index_tuples_read BIGINT,
  index_tuples_fetched BIGINT,
  index_size BIGINT
);

-- 采集索引统计数据
INSERT INTO index_usage_history (
  schemaname,
  table_name,
  index_name,
  index_scans,
  index_tuples_read,
  index_tuples_fetched,
  index_size
)
SELECT 
  schemaname,
  relname AS table_name,
  indexrelname AS index_name,
  idx_scan,
  idx_tup_read,
  idx_tup_fetch,
  pg_relation_size(indexrelid) AS index_size
FROM pg_stat_user_indexes;

2. 分析查询计划中的索引使用

sql
-- 使用EXPLAIN ANALYZE分析查询如何使用索引
EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'user@example.com';

-- 使用EXPLAIN (VERBOSE, COSTS, BUFFERS)获取更详细的信息
EXPLAIN (VERBOSE, COSTS, BUFFERS) SELECT * FROM orders WHERE user_id = 123 AND created_at > '2023-01-01';

-- 使用EXPLAIN (FORMAT JSON)获取JSON格式的执行计划
EXPLAIN (FORMAT JSON) SELECT * FROM products WHERE category_id = 5 ORDER BY price DESC LIMIT 10;

索引效率优化

1. 识别低效索引

sql
-- 识别低效索引(高扫描但低命中率)
SELECT 
  schemaname,
  relname AS table_name,
  indexrelname AS index_name,
  idx_scan AS index_scans,
  idx_tup_read AS index_tuples_read,
  idx_tup_fetch AS index_tuples_fetched,
  CASE WHEN idx_tup_read > 0 
    THEN idx_tup_fetch / idx_tup_read::numeric * 100 
    ELSE 0 
  END AS index_efficiency,
  pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
WHERE idx_tup_read > 1000
  AND (idx_tup_fetch / idx_tup_read::numeric * 100) < 10
  AND schemaname NOT IN ('pg_catalog', 'information_schema')
ORDER BY index_scans DESC;

2. 优化索引设计

前缀索引优化

sql
-- 对于长字符串,可以考虑使用前缀索引
CREATE INDEX idx_users_email_prefix ON users (email(20));

-- 测试前缀索引效果
EXPLAIN ANALYZE SELECT * FROM users WHERE email LIKE 'user%';

复合索引顺序优化

sql
-- 复合索引顺序应遵循选择性原则(选择性高的列放在前面)
-- 不好的设计:低选择性列在前
CREATE INDEX idx_orders_status_date ON orders (status, created_at);

-- 好的设计:高选择性列在前
CREATE INDEX idx_orders_date_status ON orders (created_at, status);

覆盖索引优化

sql
-- 覆盖索引包含查询所需的所有列,避免回表操作
-- 原查询
EXPLAIN ANALYZE SELECT id, name, price FROM products WHERE category_id = 5;

-- 创建覆盖索引
CREATE INDEX idx_products_category_id_covering ON products (category_id) INCLUDE (name, price);

-- 再次分析查询
EXPLAIN ANALYZE SELECT id, name, price FROM products WHERE category_id = 5;

索引使用监控与告警

1. 设置索引使用监控

sql
-- 创建索引使用监控视图
CREATE OR REPLACE VIEW v_index_usage_report AS
SELECT 
  schemaname,
  relname AS table_name,
  indexrelname AS index_name,
  idx_scan AS index_scans,
  seq_scan AS table_scans,
  idx_tup_read AS index_tuples_read,
  idx_tup_fetch AS index_tuples_fetched,
  pg_size_pretty(pg_relation_size(indexrelid)) AS index_size,
  CASE WHEN seq_scan + idx_scan > 0 
    THEN idx_scan / (seq_scan + idx_scan)::numeric * 100 
    ELSE 0 
  END AS index_usage_percentage,
  CASE WHEN idx_tup_read > 0 
    THEN idx_tup_fetch / idx_tup_read::numeric * 100 
    ELSE 0 
  END AS index_efficiency
FROM pg_stat_user_indexes i
JOIN pg_stat_user_tables t ON i.schemaname = t.schemaname AND i.relname = t.relname
WHERE i.schemaname NOT IN ('pg_catalog', 'information_schema');

-- 使用视图生成报告
SELECT * FROM v_index_usage_report ORDER BY index_usage_percentage ASC;

2. 配置索引使用告警

可以结合Prometheus+Grafana设置以下告警:

  • 索引使用百分比低于10%
  • 未使用的索引超过10个
  • 索引大小超过表大小的50%
  • 索引扫描次数为0但大小超过100MB

最佳实践

1. 定期分析索引使用情况

  • 每周或每月生成索引使用报告
  • 分析索引使用趋势
  • 清理不必要的索引
  • 优化低效索引

2. 索引设计原则

  • 只为经常用于查询条件、排序和分组的列创建索引
  • 复合索引顺序遵循选择性原则
  • 避免创建太多索引,影响写入性能
  • 考虑使用覆盖索引减少回表操作
  • 定期重建碎片化严重的索引

3. 监控与维护

  • 监控索引使用统计
  • 监控索引大小和增长趋势
  • 定期重建或重索引
  • 保持统计信息更新
sql
-- 更新表统计信息
ANALYZE VERBOSE table_name;

-- 或更新数据库所有表统计信息
ANALYZE VERBOSE;

-- 重建索引
REINDEX INDEX index_name;

-- 或重建表所有索引
REINDEX TABLE table_name;

-- 重建数据库所有索引
REINDEX DATABASE database_name;

常见问题(FAQ)

Q1:如何查看索引是否被使用?

A1:可以通过查询pg_stat_user_indexes视图查看索引的使用情况:

sql
SELECT 
  relname AS table_name,
  indexrelname AS index_name,
  idx_scan AS index_scans
FROM pg_stat_user_indexes
WHERE schemaname = 'public';

Q2:如何识别未使用的索引?

A2:查找idx_scan为0或很低的索引:

sql
SELECT 
  relname AS table_name,
  indexrelname AS index_name,
  idx_scan AS index_scans,
  pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
WHERE idx_scan = 0
  AND schemaname = 'public'
ORDER BY index_size DESC;

Q3:未使用的索引都应该删除吗?

A3:不一定,有些索引虽然使用频率低,但在特定场景下很重要:

  • 用于唯一性约束的索引
  • 用于外键约束的索引
  • 用于定期批量查询的索引
  • 用于报表和数据分析的索引

Q4:如何优化索引使用效率?

A4:优化方法:

  • 调整复合索引的列顺序
  • 使用覆盖索引减少回表操作
  • 考虑使用前缀索引处理长字符串
  • 定期更新统计信息
  • 重建碎片化严重的索引

Q5:如何分析索引使用趋势?

A5:可以创建一个历史表定期保存索引统计数据,然后分析使用趋势:

sql
-- 创建历史表
CREATE TABLE index_usage_history (
  id SERIAL PRIMARY KEY,
  capture_time TIMESTAMP DEFAULT NOW(),
  table_name TEXT,
  index_name TEXT,
  index_scans BIGINT,
  index_size BIGINT
);

-- 定期插入数据
INSERT INTO index_usage_history (table_name, index_name, index_scans, index_size)
SELECT 
  relname, 
  indexrelname, 
  idx_scan, 
  pg_relation_size(indexrelid)
FROM pg_stat_user_indexes
WHERE schemaname = 'public';

Q6:如何重置索引统计?

A6:可以使用以下命令重置索引统计:

sql
-- 重置所有索引统计
SELECT pg_stat_reset_indexes();

-- 重置特定表的索引统计
SELECT pg_stat_reset_single_table_counters('table_name'::regclass);

Q7:覆盖索引有什么优势?

A7:覆盖索引包含查询所需的所有列,避免了回表操作,提高查询性能。例如:

sql
-- 创建覆盖索引
CREATE INDEX idx_products_category_id_covering ON products (category_id) INCLUDE (name, price);

-- 查询可以直接使用索引,无需回表
SELECT id, name, price FROM products WHERE category_id = 5;

Q8:如何选择复合索引的列顺序?

A8:复合索引的列顺序应遵循选择性原则,选择性高的列放在前面。例如:

  • 不好的设计:(status, created_at)(status选择性低)
  • 好的设计:(created_at, status)(created_at选择性高)