外观
SQLServer 监控诊断命令
动态管理视图 (DMVs) 和动态管理函数 (DMFs)
系统状态监控
生产场景示例:实时监控SQL Server服务器的基本状态,包括版本、内存使用和活动会话,用于日常健康检查。
sql
-- 查看系统基本信息
SELECT @@VERSION AS SQLServerVersion;
SELECT @@SERVERNAME AS ServerName;
SELECT DB_NAME() AS CurrentDatabase;
-- 查看服务器状态
SELECT
cpu_count AS LogicalCPUCount,
hyperthread_ratio AS HyperthreadRatio,
physical_memory_kb / 1024 AS PhysicalMemoryMB,
virtual_memory_kb / 1024 AS VirtualMemoryMB,
committed_kb / 1024 AS CommittedMemoryMB,
committed_target_kb / 1024 AS CommittedTargetMB,
sqlserver_start_time AS SQLServerStartTime
FROM sys.dm_os_sys_info;
-- 查看当前活动会话
SELECT
session_id,
status,
login_name,
host_name,
program_name,
command,
database_id,
cpu_time,
memory_usage,
reads,
writes,
logical_reads
FROM sys.dm_exec_sessions
WHERE is_user_process = 1;
-- 查看当前请求
SELECT
r.session_id,
r.status,
r.command,
r.cpu_time,
r.reads,
r.writes,
r.logical_reads,
r.row_count,
r.total_elapsed_time,
t.text AS SqlText
FROM sys.dm_exec_requests r
CROSS APPLY sys.dm_exec_sql_text(r.sql_handle) t
WHERE r.session_id > 50; -- 排除系统会话性能监控
生产场景示例:监控SQL Server性能瓶颈,包括等待统计信息、CPU密集型查询和IO密集型查询,用于性能调优。
sql
-- 查看等待统计信息
SELECT
wait_type,
waiting_tasks_count,
wait_time_ms,
max_wait_time_ms,
signal_wait_time_ms
FROM sys.dm_os_wait_stats
ORDER BY wait_time_ms DESC;
-- 查看当前等待的任务
SELECT
wt.session_id,
wt.wait_type,
wt.wait_duration_ms,
wt.blocking_session_id,
wt.resource_description,
t.text AS SqlText
FROM sys.dm_os_waiting_tasks wt
JOIN sys.dm_exec_requests r ON wt.session_id = r.session_id
CROSS APPLY sys.dm_exec_sql_text(r.sql_handle) t;
-- 查看CPU使用率
SELECT
TOP 10
total_worker_time / 1000 AS TotalCPUTimeMS,
execution_count,
total_worker_time / execution_count / 1000 AS AvgCPUTimeMS,
t.text AS SqlText
FROM sys.dm_exec_query_stats
CROSS APPLY sys.dm_exec_sql_text(sql_handle) t
ORDER BY total_worker_time DESC;
-- 查看IO密集型查询
SELECT
TOP 10
(total_logical_reads + total_logical_writes) AS TotalIOMB,
execution_count,
(total_logical_reads + total_logical_writes) / execution_count AS AvgIOMB,
t.text AS SqlText
FROM sys.dm_exec_query_stats
CROSS APPLY sys.dm_exec_sql_text(sql_handle) t
ORDER BY (total_logical_reads + total_logical_writes) DESC;内存使用监控
生产场景示例:监控SQL Server内存使用情况,包括内存分配、缓冲池使用和内存授予,用于内存配置优化。
sql
-- 查看内存使用情况
SELECT
type,
name,
SUM(pages_kb) / 1024 AS MemoryMB
FROM sys.dm_os_memory_clerks
GROUP BY type, name
ORDER BY SUM(pages_kb) DESC;
-- 查看缓冲池使用情况
SELECT
db_name(database_id) AS DatabaseName,
COUNT(*) * 8 / 1024 AS BufferPoolMB
FROM sys.dm_os_buffer_descriptors
GROUP BY database_id
ORDER BY BufferPoolMB DESC;
-- 查看单个数据库的缓冲池使用
SELECT
OBJECT_NAME(p.object_id) AS TableName,
COUNT(*) * 8 / 1024 AS BufferPoolMB
FROM sys.dm_os_buffer_descriptors bd
JOIN sys.allocation_units au ON bd.allocation_unit_id = au.allocation_unit_id
JOIN sys.partitions p ON au.container_id = p.hobt_id
WHERE bd.database_id = DB_ID() AND p.object_id > 100
GROUP BY p.object_id
ORDER BY BufferPoolMB DESC;
-- 查看内存授予情况
SELECT
session_id,
request_id,
granted_memory_kb / 1024 AS GrantedMemoryMB,
requested_memory_kb / 1024 AS RequestedMemoryMB,
required_memory_kb / 1024 AS RequiredMemoryMB,
query_cost,
timeout_sec,
resource_semaphore_id
FROM sys.dm_exec_query_memory_grants;索引和查询性能
生产场景示例:监控索引使用情况、缺失索引和索引碎片,用于索引优化。
sql
-- 查看缺失索引
SELECT
migs.avg_total_user_cost * (migs.avg_user_impact / 100.0) * (migs.user_seeks + migs.user_scans) AS ImprovementMeasure,
OBJECT_NAME(mid.object_id) AS TableName,
'CREATE INDEX [IX_' + OBJECT_NAME(mid.object_id) + '_' + REPLACE(REPLACE(REPLACE(ISNULL(mid.equality_columns, ''), ', ', '_'), '[', ''), ']', '') + CASE
WHEN mid.equality_columns IS NOT NULL AND mid.inequality_columns IS NOT NULL THEN '_' ELSE '' END + REPLACE(REPLACE(REPLACE(ISNULL(mid.inequality_columns, ''), ', ', '_'), '[', ''), ']', '') + ']' + ' ON ' + mid.statement + ' (' + ISNULL(mid.equality_columns, '') + CASE WHEN mid.equality_columns IS NOT NULL AND mid.inequality_columns IS NOT NULL THEN ', ' ELSE '' END + ISNULL(mid.inequality_columns, '') + ')' + ISNULL(' INCLUDE (' + mid.included_columns + ')', '') AS CreateIndexStatement
FROM sys.dm_db_missing_index_groups mig
JOIN sys.dm_db_missing_index_group_stats migs ON mig.index_group_handle = migs.group_handle
JOIN sys.dm_db_missing_index_details mid ON mig.index_handle = mid.index_handle
ORDER BY ImprovementMeasure DESC;
-- 查看索引使用情况
SELECT
OBJECT_NAME(i.object_id) AS TableName,
i.name AS IndexName,
i.type_desc AS IndexType,
us.user_seeks,
us.user_scans,
us.user_lookups,
us.user_updates,
us.last_user_seek,
us.last_user_scan,
us.last_user_lookup,
us.last_user_update
FROM sys.indexes i
LEFT JOIN sys.dm_db_index_usage_stats us ON i.object_id = us.object_id AND i.index_id = us.index_id AND us.database_id = DB_ID()
WHERE i.object_id > 100 -- 排除系统表
ORDER BY OBJECT_NAME(i.object_id), i.name;
-- 查看索引碎片
SELECT
OBJECT_NAME(object_id) AS TableName,
name AS IndexName,
index_type_desc AS IndexType,
avg_fragmentation_in_percent AS FragmentationPercent,
page_count AS PageCount
FROM sys.dm_db_index_physical_stats(
DB_ID(), OBJECT_ID('TableName'), NULL, NULL, 'LIMITED');系统存储过程和函数
系统信息查询
生产场景示例:查询SQL Server配置、数据库信息和表结构,用于系统管理和故障诊断。
sql
-- 查看服务器配置
EXEC sp_configure;
-- 查看高级配置
EXEC sp_configure 'show advanced options', 1;
RECONFIGURE;
EXEC sp_configure;
-- 查看数据库信息
EXEC sp_helpdb;
EXEC sp_helpdb 'DatabaseName';
-- 查看表信息
EXEC sp_help 'TableName';
EXEC sp_columns 'TableName';
EXEC sp_helpindex 'TableName';
-- 查看作业信息
EXEC msdb.dbo.sp_help_job;
EXEC msdb.dbo.sp_help_jobhistory @job_name = 'JobName';性能相关存储过程
生产场景示例:查看当前活动进程、锁信息和对象使用情况,用于性能监控和故障诊断。
sql
-- 查看当前活动进程
EXEC sp_who;
EXEC sp_who2;
EXEC sp_who2 'active';
-- 查看当前锁信息
EXEC sp_lock;
-- SQL Server 2005+ 推荐使用动态管理视图
SELECT * FROM sys.dm_tran_locks;
-- 查看对象使用情况
EXEC sp_spaceused 'TableName';
EXEC sp_spaceused 'TableName', 'true'; -- 带更新统计
-- 查看缓存使用情况
EXEC sp_cacheinfo;
-- 清除缓存
DBCC FREEPROCCACHE;
DBCC DROPCLEANBUFFERS;错误日志查询
生产场景示例:查询SQL Server错误日志和SQL Server Agent日志,用于故障诊断和系统监控。
sql
-- 查看错误日志
EXEC xp_readerrorlog;
EXEC xp_readerrorlog 0, 1, 'Error'; -- 搜索包含"Error"的错误日志
EXEC xp_readerrorlog 0, 1, NULL, NULL, '2023-12-01', '2023-12-31'; -- 指定日期范围
-- 查看SQL Server Agent日志
EXEC msdb.dbo.sp_readerrorlog @p1 = 1; -- 1 表示SQL Server Agent日志
-- 循环错误日志
EXEC sp_cycle_errorlog;
EXEC msdb.dbo.sp_cycle_agent_errorlog; -- 循环SQL Server Agent日志DBCC 命令
数据库维护命令
生产场景示例:检查数据库完整性、表完整性和索引完整性,用于数据库维护和故障诊断。
sql
-- 检查数据库完整性
DBCC CHECKDB ('DatabaseName') WITH NO_INFOMSGS, ALL_ERRORMSGS;
-- 检查表完整性
DBCC CHECKTABLE ('TableName') WITH NO_INFOMSGS;
-- 检查索引完整性
DBCC CHECKINDEX ('TableName') WITH NO_INFOMSGS;
-- 检查文件组完整性
DBCC CHECKFILEGROUP ('PRIMARY') WITH NO_INFOMSGS;
-- 更新统计信息
DBCC UPDATEUSAGE ('DatabaseName');
DBCC UPDATEUSAGE ('DatabaseName', 'TableName');性能相关命令
生产场景示例:查看事务日志空间使用情况、IO统计信息和等待统计信息,用于性能监控和故障诊断。
sql
-- 查看事务日志空间使用情况
DBCC SQLPERF(LOGSPACE);
-- 查看IO统计信息
DBCC SQLPERF('IOSTATS');
-- 查看 latch 统计信息
DBCC SQLPERF('LATCHSTATS');
-- 查看等待统计信息
DBCC SQLPERF('WAITSTATS');
-- 重置等待统计信息
DBCC SQLPERF('WAITSTATS', CLEAR);内存相关命令
生产场景示例:查看内存状态、缓冲池使用情况和清除缓存,用于内存监控和故障诊断。
sql
-- 查看内存状态
DBCC MEMORYSTATUS;
-- 查看缓冲池使用情况
DBCC SHOWBUFFERPOOLUSAGE;
-- 清除缓冲池
DBCC DROPCLEANBUFFERS;
-- 清除计划缓存
DBCC FREEPROCCACHE;
-- 清除特定计划缓存
DBCC FREEPROCCACHE (plan_handle);
-- 清除会话缓存
DBCC FREESESSIONCACHE;
-- 清除系统缓存
DBCC FREESYSTEMCACHE ('ALL');其他常用 DBCC 命令
生产场景示例:查看会话SQL语句、死锁信息和数据库文件大小,用于故障诊断和系统监控。
sql
-- 查看当前会话ID
DBCC INPUTBUFFER(@@SPID);
-- 查看指定会话的SQL语句
DBCC INPUTBUFFER(session_id);
-- 查看死锁信息(需要启用跟踪标志1222或1448)
DBCC TRACEON(1222, -1); -- 启用死锁跟踪
DBCC TRACESTATUS(1222, -1); -- 查看跟踪状态
DBCC TRACEOFF(1222, -1); -- 关闭死锁跟踪
-- 查看数据库文件大小和空间使用
DBCC SHOWFILESTATS;
-- 收缩数据库
DBCC SHRINKDATABASE ('DatabaseName', 10); -- 收缩到10%可用空间
-- 收缩文件
DBCC SHRINKFILE ('FileName', 500); -- 收缩到500MBSQL Server Agent 命令
生产场景示例:管理SQL Server Agent作业,包括查看作业列表、作业步骤和作业历史,用于作业监控和管理。
sql
-- 查看作业列表
SELECT
job_id,
name,
enabled,
description,
date_created,
date_modified
FROM msdb.dbo.sysjobs;
-- 查看作业步骤
SELECT
j.name AS JobName,
js.step_id,
js.step_name,
js.subsystem,
js.command,
js.on_success_action,
js.on_fail_action
FROM msdb.dbo.sysjobs j
JOIN msdb.dbo.sysjobsteps js ON j.job_id = js.job_id;
-- 查看作业历史
SELECT
j.name AS JobName,
jh.step_id,
jh.step_name,
jh.run_date,
jh.run_time,
jh.run_duration,
jh.sql_severity,
jh.message
FROM msdb.dbo.sysjobs j
JOIN msdb.dbo.sysjobhistory jh ON j.job_id = jh.job_id
ORDER BY jh.run_date DESC, jh.run_time DESC;
-- 执行作业
EXEC msdb.dbo.sp_start_job @job_name = 'JobName';
-- 停止作业
EXEC msdb.dbo.sp_stop_job @job_name = 'JobName';
-- 禁用作业
EXEC msdb.dbo.sp_update_job @job_name = 'JobName', @enabled = 0;
-- 启用作业
EXEC msdb.dbo.sp_update_job @job_name = 'JobName', @enabled = 1;扩展事件 (Extended Events)
查看系统定义的扩展事件会话
生产场景示例:查看SQL Server系统定义的扩展事件会话,用于监控和故障诊断。
sql
SELECT
name,
description,
is_system,
is_enabled
FROM sys.server_event_sessions;
-- 查看会话事件
SELECT
s.name AS SessionName,
e.name AS EventName,
e.description AS EventDescription
FROM sys.server_event_sessions s
JOIN sys.server_event_session_events e ON s.event_session_id = e.event_session_id;创建和使用扩展事件会话
生产场景示例:创建扩展事件会话监控死锁,用于死锁诊断和分析。
sql
-- 创建扩展事件会话监控死锁
CREATE EVENT SESSION [Deadlock_Monitor]
ON SERVER
ADD EVENT sqlserver.lock_deadlock,
ADD EVENT sqlserver.lock_deadlock_chain
ADD TARGET package0.event_file(
SET filename = 'D:\XEvents\Deadlock_Monitor.xel',
max_file_size = (5),
max_rollover_files = (10)
)
WITH (
MAX_MEMORY = 4096 KB,
EVENT_RETENTION_MODE = ALLOW_SINGLE_EVENT_LOSS,
MAX_DISPATCH_LATENCY = 30 SECONDS,
MAX_EVENT_SIZE = 0 KB,
MEMORY_PARTITION_MODE = NONE,
TRACK_CAUSALITY = ON,
STARTUP_STATE = OFF
);
-- 启动扩展事件会话
ALTER EVENT SESSION [Deadlock_Monitor] ON SERVER STATE = START;
-- 停止扩展事件会话
ALTER EVENT SESSION [Deadlock_Monitor] ON SERVER STATE = STOP;
-- 删除扩展事件会话
DROP EVENT SESSION [Deadlock_Monitor] ON SERVER;
-- 查看扩展事件数据
SELECT
event_data = CONVERT(xml, event_data),
file_name,
file_offset
FROM sys.fn_xe_file_target_read_file(
'D:\XEvents\Deadlock_Monitor*.xel',
NULL, NULL, NULL);Performance Monitor 计数器
常用计数器
生产场景示例:使用Performance Monitor监控SQL Server性能,包括缓冲区缓存命中率、页预期寿命和批处理请求数。
| 计数器类别 | 计数器名称 | 说明 |
|---|---|---|
| SQLServer:Buffer Manager | Buffer cache hit ratio | 缓冲区缓存命中率,理想值 > 95% |
| SQLServer:Buffer Manager | Page life expectancy | 页在缓冲区中的预期寿命,理想值 > 300秒 |
| SQLServer:Buffer Manager | Page reads/sec | 每秒页读取数 |
| SQLServer:Buffer Manager | Page writes/sec | 每秒页写入数 |
| SQLServer:General Statistics | User Connections | 当前用户连接数 |
| SQLServer:SQL Statistics | Batch Requests/sec | 每秒批处理请求数 |
| SQLServer:SQL Statistics | SQL Compilations/sec | 每秒SQL编译数 |
| SQLServer:SQL Statistics | SQL Re-Compilations/sec | 每秒SQL重编译数 |
| SQLServer:Wait Statistics | Lock Wait Time (ms) | 锁等待时间(毫秒) |
| SQLServer:Wait Statistics | Latch Wait Time (ms) | 闩锁等待时间(毫秒) |
| Process | % Processor Time | SQL Server进程CPU使用率 |
| PhysicalDisk | % Disk Time | 磁盘时间百分比 |
| PhysicalDisk | Avg. Disk Queue Length | 平均磁盘队列长度 |
使用 PowerShell 收集性能计数器
生产场景示例:使用PowerShell脚本自动收集SQL Server性能计数器数据,用于性能监控和趋势分析。
powershell
# PowerShell脚本:收集SQL Server性能计数器
$CounterList = @(
"SQLServer:Buffer Manager\Buffer cache hit ratio",
"SQLServer:Buffer Manager\Page life expectancy",
"SQLServer:General Statistics\User Connections",
"SQLServer:SQL Statistics\Batch Requests/sec",
"Processor(_Total)\% Processor Time",
"PhysicalDisk(_Total)\Avg. Disk Queue Length"
)
$OutputFile = "C:\PerfMonData.csv"
$SampleInterval = 5 # 采样间隔(秒)
$SampleCount = 60 # 采样次数
Import-Module PSSQLite
# 创建性能计数器对象
$PerfCounter = New-Object System.Diagnostics.PerformanceCounterSampleSet $CounterList
# 开始收集数据
$Results = @()
for ($i = 0; $i -lt $SampleCount; $i++) {
$Sample = $PerfCounter.NextSample()
$Timestamp = Get-Date
$Row = [PSCustomObject]@{
Timestamp = $Timestamp
}
foreach ($Counter in $Sample.Counters) {
$Row | Add-Member -NotePropertyName $Counter.Path -NotePropertyValue $Counter.CookedValue
}
$Results += $Row
Start-Sleep -Seconds $SampleInterval
}
# 导出到CSV
$Results | Export-Csv -Path $OutputFile -NoTypeInformation监控脚本示例
实时监控活动会话和阻塞
生产场景示例:创建存储过程实时监控活动会话和阻塞,用于快速定位和解决阻塞问题。
sql
-- 创建实时监控活动会话和阻塞的存储过程
CREATE PROCEDURE dbo.usp_MonitorActiveSessions
AS
BEGIN
SET NOCOUNT ON;
SELECT
GETDATE() AS MonitorTime,
r.session_id,
s.login_name,
s.host_name,
s.program_name,
r.status,
r.command,
r.blocking_session_id,
r.cpu_time,
r.reads,
r.writes,
r.logical_reads,
r.row_count,
r.total_elapsed_time,
r.wait_type,
r.wait_time,
r.wait_resource,
t.text AS SqlText
FROM sys.dm_exec_requests r
JOIN sys.dm_exec_sessions s ON r.session_id = s.session_id
CROSS APPLY sys.dm_exec_sql_text(r.sql_handle) t
WHERE r.session_id > 50
ORDER BY r.total_elapsed_time DESC;
END;
GO监控索引碎片并生成重组/重建脚本
生产场景示例:创建存储过程监控索引碎片并生成重组/重建脚本,用于自动索引维护。
sql
-- 创建索引碎片监控和维护脚本
CREATE PROCEDURE dbo.usp_MaintainIndexes
@DatabaseName SYSNAME,
@FragmentationThresholdLow FLOAT = 5.0,
@FragmentationThresholdHigh FLOAT = 30.0
AS
BEGIN
SET NOCOUNT ON;
DECLARE @TableName SYSNAME;
DECLARE @IndexName SYSNAME;
DECLARE @Fragmentation FLOAT;
DECLARE @SQL NVARCHAR(MAX);
-- 创建临时表存储索引碎片信息
CREATE TABLE #IndexFragmentation (
DatabaseName SYSNAME,
SchemaName SYSNAME,
TableName SYSNAME,
IndexName SYSNAME,
Fragmentation FLOAT,
PageCount INT
);
-- 插入索引碎片信息
INSERT INTO #IndexFragmentation
EXEC sp_executesql N'
SELECT
DB_NAME() AS DatabaseName,
SCHEMA_NAME(o.schema_id) AS SchemaName,
o.name AS TableName,
i.name AS IndexName,
ips.avg_fragmentation_in_percent AS Fragmentation,
ips.page_count AS PageCount
FROM sys.dm_db_index_physical_stats(DB_ID(@DatabaseName), NULL, NULL, NULL, ''LIMITED'') ips
JOIN sys.indexes i ON ips.object_id = i.object_id AND ips.index_id = i.index_id
JOIN sys.objects o ON i.object_id = o.object_id
WHERE o.type = ''U'' AND i.type > 0 AND ips.page_count > 100
', N'@DatabaseName SYSNAME', @DatabaseName;
-- 生成并执行索引维护脚本
DECLARE IndexCursor CURSOR FOR
SELECT TableName, IndexName, Fragmentation
FROM #IndexFragmentation
WHERE Fragmentation >= @FragmentationThresholdLow;
OPEN IndexCursor;
FETCH NEXT FROM IndexCursor INTO @TableName, @IndexName, @Fragmentation;
WHILE @@FETCH_STATUS = 0
BEGIN
IF @Fragmentation < @FragmentationThresholdHigh
BEGIN
-- 重组索引(低碎片)
SET @SQL = N'ALTER INDEX [' + @IndexName + '] ON [' + @DatabaseName + '].[dbo].[' + @TableName + '] REORGANIZE;';
END
ELSE
BEGIN
-- 重建索引(高碎片)
SET @SQL = N'ALTER INDEX [' + @IndexName + '] ON [' + @DatabaseName + '].[dbo].[' + @TableName + '] REBUILD WITH (ONLINE = ON);';
END
PRINT @SQL;
EXEC sp_executesql @SQL;
FETCH NEXT FROM IndexCursor INTO @TableName, @IndexName, @Fragmentation;
END
CLOSE IndexCursor;
DEALLOCATE IndexCursor;
DROP TABLE #IndexFragmentation;
END;
GO监控磁盘空间使用
生产场景示例:使用PowerShell脚本监控SQL Server数据库磁盘空间,当使用率超过阈值时发送告警邮件。
powershell
# PowerShell脚本:监控SQL Server数据库磁盘空间
Import-Module SqlServer
$SqlInstance = "ServerName"
$Databases = Get-DbaDatabase -SqlInstance $SqlInstance
$AlertThreshold = 80 # 告警阈值(%)
$SmtpServer = "smtp.example.com"
$From = "alerts@example.com"
$To = "dba@example.com"
$AlertDatabases = @()
foreach ($DB in $Databases) {
if ($DB.Name -notin "master", "model", "msdb", "tempdb") {
$SpaceInfo = Get-DbaDbSpace -SqlInstance $SqlInstance -Database $DB.Name
$UsedPercent = [math]::Round(($SpaceInfo.UsedSpaceGB / $SpaceInfo.TotalSpaceGB) * 100, 2)
if ($UsedPercent -ge $AlertThreshold) {
$AlertDatabases += [PSCustomObject]@{
DatabaseName = $DB.Name
TotalSpaceGB = [math]::Round($SpaceInfo.TotalSpaceGB, 2)
UsedSpaceGB = [math]::Round($SpaceInfo.UsedSpaceGB, 2)
FreeSpaceGB = [math]::Round($SpaceInfo.FreeSpaceGB, 2)
UsedPercent = $UsedPercent
}
}
}
}
if ($AlertDatabases.Count -gt 0) {
$Subject = "告警:SQL Server 数据库磁盘空间不足"
$Body = $AlertDatabases | Format-Table -AutoSize | Out-String
Send-MailMessage -SmtpServer $SmtpServer -From $From -To $To -Subject $Subject -Body $Body
}版本差异
SQL Server 2012及以前
- 部分DMV功能有限
- 扩展事件功能相对简单
- 缺少一些高级监控功能
SQL Server 2014
- 增强了内存管理相关DMVs
- 引入了更多性能监控功能
- 扩展事件功能得到增强
SQL Server 2016
- 引入了Query Store功能
- 增强了扩展事件功能
- 增加了新的DMVs用于查询性能监控
SQL Server 2017
- 增强了智能查询处理相关功能
- 扩展事件功能更加完善
- 增加了更多性能监控和诊断功能
SQL Server 2019
- 引入了更多智能查询处理功能
- 增强了扩展事件的性能
- 增加了新的DMVs用于内存和I/O监控
SQL Server 2022
- 增强了Query Store功能
- 引入了更多云集成监控功能
- 增加了新的DMVs用于性能监控和诊断
监控最佳实践
监控策略
- 确定监控目标:明确监控的业务目标和关键指标
- 选择监控工具:根据需求选择合适的监控工具(内置工具或第三方工具)
- 设置合理的告警阈值:避免过多的误告警
- 定期审查监控数据:分析趋势,预测未来需求
- 建立监控基线:了解系统正常运行状态
- 自动化监控和告警:减少人工干预
- 记录和分析告警:持续改进监控策略
性能监控最佳实践
- 监控关键性能指标:CPU、内存、磁盘I/O、网络
- 监控等待统计信息:识别性能瓶颈
- 监控索引使用情况:优化索引设计
- 监控查询性能:识别慢查询和资源密集型查询
- 监控数据库增长:预测存储需求
- 定期进行性能基线测试:了解系统性能变化
故障诊断最佳实践
- 建立故障诊断流程:明确故障诊断步骤
- 收集完整的诊断数据:包括错误日志、性能计数器、DMV数据等
- 分析数据找出根本原因:避免只处理症状
- 测试解决方案:确保解决方案有效
- 记录故障和解决方案:建立知识库
- 定期进行故障演练:提高故障处理能力
常见问题(FAQ)
如何识别SQL Server性能瓶颈?
解答:
- 使用等待统计信息:
sys.dm_os_wait_stats可以显示SQL Server正在等待什么资源 - 监控资源使用情况:CPU、内存、磁盘I/O、网络
- 分析慢查询:使用
sys.dm_exec_query_stats查找CPU或IO密集型查询 - 检查索引使用情况:
sys.dm_db_index_usage_stats可以显示索引是否被有效使用 - 查看阻塞情况:
sys.dm_os_waiting_tasks可以显示当前阻塞的会话 - 监控缓冲区缓存命中率:理想值应 > 95%
如何查找导致CPU使用率高的查询?
解答:
sql
-- 查找CPU使用率高的查询
SELECT
TOP 10
total_worker_time / 1000 AS TotalCPUTimeMS,
execution_count,
total_worker_time / execution_count / 1000 AS AvgCPUTimeMS,
t.text AS SqlText,
qp.query_plan AS QueryPlan
FROM sys.dm_exec_query_stats
CROSS APPLY sys.dm_exec_sql_text(sql_handle) t
CROSS APPLY sys.dm_exec_query_plan(plan_handle) qp
ORDER BY total_worker_time DESC;如何监控SQL Server的内存使用情况?
解答:
使用动态管理视图:
sqlSELECT type, name, SUM(pages_kb) / 1024 AS MemoryMB FROM sys.dm_os_memory_clerks GROUP BY type, name ORDER BY SUM(pages_kb) DESC;使用DBCC命令:
sqlDBCC MEMORYSTATUS;使用性能计数器:
- SQLServer:Memory Manager\Total Server Memory (KB)
- SQLServer:Memory Manager\Target Server Memory (KB)
如何监控SQL Server的磁盘I/O?
解答:
使用动态管理视图:
sqlSELECT DB_NAME(database_id) AS DatabaseName, file_id, io_stall_read_ms, num_of_reads, io_stall_write_ms, num_of_writes, size_on_disk_bytes / 1024 / 1024 AS SizeMB FROM sys.dm_io_virtual_file_stats(NULL, NULL) ORDER BY io_stall_read_ms + io_stall_write_ms DESC;使用DBCC命令:
sqlDBCC SQLPERF('IOSTATS');使用性能计数器:
- PhysicalDisk\Avg. Disk Queue Length
- PhysicalDisk% Disk Time
- PhysicalDisk\Disk Reads/sec
- PhysicalDisk\Disk Writes/sec
如何监控SQL Server的死锁?
解答:
启用死锁跟踪标志:
sqlDBCC TRACEON(1222, -1); -- 以XML格式记录死锁 DBCC TRACEON(1204, -1); -- 以文本格式记录死锁使用扩展事件:
sqlCREATE EVENT SESSION [Deadlock_Monitor] ON SERVER ADD EVENT sqlserver.lock_deadlock, ADD EVENT sqlserver.lock_deadlock_chain ADD TARGET package0.event_file( SET filename = 'D:\XEvents\Deadlock_Monitor.xel' );使用SQL Server Agent告警:
- 配置SQL Server Agent告警,当发生死锁时发送通知
如何清除SQL Server缓存?
解答:
sql
-- 清除计划缓存
DBCC FREEPROCCACHE;
-- 清除缓冲区缓存
DBCC DROPCLEANBUFFERS;
-- 清除会话缓存
DBCC FREESESSIONCACHE;
-- 清除系统缓存
DBCC FREESYSTEMCACHE('ALL');注意:清除缓存会影响性能,只应在测试环境或维护窗口使用。
如何查看SQL Server的版本和补丁级别?
解答:
sql
-- 方法1:使用 @@VERSION
SELECT @@VERSION AS SQLServerVersion;
-- 方法2:使用 SERVERPROPERTY
SELECT
SERVERPROPERTY('ProductVersion') AS ProductVersion,
SERVERPROPERTY('ProductLevel') AS ProductLevel,
SERVERPROPERTY('Edition') AS Edition,
SERVERPROPERTY('BuildClrVersion') AS BuildClrVersion;
-- 方法3:使用系统存储过程
EXEC xp_msver;如何监控SQL Server作业执行情况?
解答:
使用系统存储过程:
sqlEXEC msdb.dbo.sp_help_job; EXEC msdb.dbo.sp_help_jobhistory @job_name = 'JobName';查询系统表:
sqlSELECT j.name AS JobName, j.enabled, jh.run_date, jh.run_time, jh.run_duration, jh.sql_severity, jh.message FROM msdb.dbo.sysjobs j JOIN msdb.dbo.sysjobhistory jh ON j.job_id = jh.job_id ORDER BY jh.run_date DESC, jh.run_time DESC;使用SQL Server Agent警报:
- 配置SQL Server Agent警报,当作业失败时发送通知
如何使用DMVs监控SQL Server性能?
解答:
- 监控CPU使用率:
sys.dm_exec_query_stats - 监控内存使用:
sys.dm_os_memory_clerks,sys.dm_os_buffer_descriptors - 监控磁盘I/O:
sys.dm_io_virtual_file_stats - 监控等待统计信息:
sys.dm_os_wait_stats - 监控查询性能:
sys.dm_exec_requests,sys.dm_exec_sessions - 监控索引使用:
sys.dm_db_index_usage_stats,sys.dm_db_index_physical_stats - 监控阻塞情况:
sys.dm_os_waiting_tasks
如何设置SQL Server性能基线?
解答:
- 选择基线时间段:选择系统正常运行的时间段
- 确定基线指标:选择关键性能指标,如CPU使用率、内存使用率、磁盘I/O、批处理请求数等
- 收集基线数据:使用Performance Monitor、DMVs或第三方工具收集数据
- 分析基线数据:计算平均值、最大值、最小值等统计信息
- 建立基线文档:记录基线数据和分析结果
- 定期更新基线:当系统配置或负载发生变化时,更新基线
设置性能基线可以帮助DBA识别系统性能变化,及时发现和解决性能问题。
如何监控SQL Server的事务日志使用情况?
解答:
使用DBCC命令:
sqlDBCC SQLPERF(LOGSPACE);使用动态管理视图:
sqlSELECT db.name AS DatabaseName, mf.name AS LogicalName, mf.physical_name AS PhysicalPath, mf.size * 8 / 1024 AS SizeMB, df.log_reuse_wait_desc AS LogReuseWait FROM sys.databases db JOIN sys.master_files mf ON db.database_id = mf.database_id JOIN sys.dm_db_log_space_usage df ON db.database_id = df.database_id WHERE mf.type = 1; -- 1 表示日志文件监控日志文件增长事件:
- 可以使用扩展事件监控日志文件增长事件
如何识别和解决SQL Server阻塞问题?
解答:
识别阻塞:
sqlSELECT wt.session_id AS WaitingSessionId, wt.wait_type AS WaitType, wt.wait_duration_ms AS WaitDurationMS, wt.blocking_session_id AS BlockingSessionId, t1.text AS WaitingSqlText, t2.text AS BlockingSqlText FROM sys.dm_os_waiting_tasks wt JOIN sys.dm_exec_requests r1 ON wt.session_id = r1.session_id CROSS APPLY sys.dm_exec_sql_text(r1.sql_handle) t1 LEFT JOIN sys.dm_exec_requests r2 ON wt.blocking_session_id = r2.session_id CROSS APPLY sys.dm_exec_sql_text(r2.sql_handle) t2 WHERE wt.blocking_session_id > 0;解决阻塞:
- 优化阻塞查询的性能
- 调整查询的执行顺序
- 使用NOLOCK或READPAST提示
- 增加索引优化查询
- 考虑使用快照隔离级别
如何监控SQL Server的备份和恢复状态?
解答:
查看备份历史:
sqlSELECT bs.database_name, bs.backup_start_date, bs.backup_finish_date, bs.backup_size / 1024 / 1024 AS BackupSizeMB, bmf.physical_device_name FROM msdb.dbo.backupset bs JOIN msdb.dbo.backupmediafamily bmf ON bs.media_set_id = bmf.media_set_id ORDER BY bs.backup_start_date DESC;查看恢复进度:
sqlSELECT r.session_id, r.command, r.percent_complete, r.total_elapsed_time / 1000 AS ElapsedSeconds, r.estimated_completion_time / 1000 AS EstimatedCompletionSeconds, t.text AS SqlText FROM sys.dm_exec_requests r CROSS APPLY sys.dm_exec_sql_text(r.sql_handle) t WHERE r.command IN ('RESTORE DATABASE', 'RESTORE LOG');监控备份和恢复事件:
- 可以使用SQL Server Agent告警或扩展事件监控备份和恢复事件
如何监控SQL Server的复制状态?
解答:
查看复制代理状态:
sqlEXEC sp_replmonitorhelppublication; EXEC sp_replmonitorhelpdistributionagent;查看复制延迟:
sqlSELECT publication_id, publisher_db, publication, subscriber_db, subscriber_server, agent_name, replication_type, last_distsync, retention, distribution_latency, subscriber_latency, overall_latency FROM distribution.dbo.MSdistribution_agents;监控复制错误:
- 可以使用SQL Server Agent告警或扩展事件监控复制错误
如何监控SQL Server的Always On可用性组状态?
解答:
查看可用性组状态:
sqlSELECT ag.name AS AvailabilityGroupName, ar.replica_server_name AS ReplicaServerName, ar.availability_mode_desc AS AvailabilityMode, ar.failover_mode_desc AS FailoverMode, ars.role_desc AS Role, ars.operational_state_desc AS OperationalState, ars.synchronization_health_desc AS SynchronizationHealth FROM sys.availability_groups ag JOIN sys.availability_replicas ar ON ag.group_id = ar.group_id JOIN sys.dm_hadr_availability_replica_states ars ON ar.replica_id = ars.replica_id;查看可用性组数据库状态:
sqlSELECT ag.name AS AvailabilityGroupName, db_name(drs.database_id) AS DatabaseName, drs.synchronization_state_desc AS SynchronizationState, drs.synchronization_health_desc AS SynchronizationHealth, drs.last_hardened_lsn, drs.last_received_lsn, drs.last_redone_lsn FROM sys.availability_groups ag JOIN sys.dm_hadr_availability_replica_states ars ON ag.group_id = ars.group_id JOIN sys.dm_hadr_database_replica_states drs ON ars.replica_id = drs.replica_id;监控可用性组事件:
- 可以使用SQL Server Agent告警或扩展事件监控可用性组事件
如何监控SQL Server的内存压力?
解答:
监控页面预期寿命:
- 性能计数器:SQLServer:Buffer Manager\Page life expectancy
- 理想值 > 300秒
监控内存授予等待:
sqlSELECT wait_type, waiting_tasks_count, wait_time_ms FROM sys.dm_os_wait_stats WHERE wait_type LIKE '%MEMORY_GRANT%';监控内存 clerk 使用:
sqlSELECT type, name, SUM(pages_kb) / 1024 AS MemoryMB FROM sys.dm_os_memory_clerks GROUP BY type, name ORDER BY SUM(pages_kb) DESC;监控内存管理器指标:
sqlSELECT total_server_memory_kb / 1024 AS TotalServerMemoryMB, target_server_memory_kb / 1024 AS TargetServerMemoryMB, memory_utilization_percentage AS MemoryUtilizationPercentage, available_commit_limit_kb / 1024 AS AvailableCommitLimitMB FROM sys.dm_os_sys_memory;
当total_server_memory_kb接近target_server_memory_kb时,说明SQL Server正在使用所有分配的内存,可能存在内存压力。
