Skip to content

SQLServer 监控诊断命令

动态管理视图 (DMVs) 和动态管理函数 (DMFs)

系统状态监控

生产场景示例:实时监控SQL Server服务器的基本状态,包括版本、内存使用和活动会话,用于日常健康检查。

sql
-- 查看系统基本信息
SELECT @@VERSION AS SQLServerVersion;
SELECT @@SERVERNAME AS ServerName;
SELECT DB_NAME() AS CurrentDatabase;

-- 查看服务器状态
SELECT 
    cpu_count AS LogicalCPUCount,
    hyperthread_ratio AS HyperthreadRatio,
    physical_memory_kb / 1024 AS PhysicalMemoryMB,
    virtual_memory_kb / 1024 AS VirtualMemoryMB,
    committed_kb / 1024 AS CommittedMemoryMB,
    committed_target_kb / 1024 AS CommittedTargetMB,
    sqlserver_start_time AS SQLServerStartTime
FROM sys.dm_os_sys_info;

-- 查看当前活动会话
SELECT 
    session_id,
    status,
    login_name,
    host_name,
    program_name,
    command,
    database_id,
    cpu_time,
    memory_usage,
    reads,
    writes,
    logical_reads
FROM sys.dm_exec_sessions
WHERE is_user_process = 1;

-- 查看当前请求
SELECT 
    r.session_id,
    r.status,
    r.command,
    r.cpu_time,
    r.reads,
    r.writes,
    r.logical_reads,
    r.row_count,
    r.total_elapsed_time,
    t.text AS SqlText
FROM sys.dm_exec_requests r
CROSS APPLY sys.dm_exec_sql_text(r.sql_handle) t
WHERE r.session_id > 50; -- 排除系统会话

性能监控

生产场景示例:监控SQL Server性能瓶颈,包括等待统计信息、CPU密集型查询和IO密集型查询,用于性能调优。

sql
-- 查看等待统计信息
SELECT 
    wait_type,
    waiting_tasks_count,
    wait_time_ms,
    max_wait_time_ms,
    signal_wait_time_ms
FROM sys.dm_os_wait_stats
ORDER BY wait_time_ms DESC;

-- 查看当前等待的任务
SELECT 
    wt.session_id,
    wt.wait_type,
    wt.wait_duration_ms,
    wt.blocking_session_id,
    wt.resource_description,
    t.text AS SqlText
FROM sys.dm_os_waiting_tasks wt
JOIN sys.dm_exec_requests r ON wt.session_id = r.session_id
CROSS APPLY sys.dm_exec_sql_text(r.sql_handle) t;

-- 查看CPU使用率
SELECT 
    TOP 10
    total_worker_time / 1000 AS TotalCPUTimeMS,
    execution_count,
    total_worker_time / execution_count / 1000 AS AvgCPUTimeMS,
    t.text AS SqlText
FROM sys.dm_exec_query_stats
CROSS APPLY sys.dm_exec_sql_text(sql_handle) t
ORDER BY total_worker_time DESC;

-- 查看IO密集型查询
SELECT 
    TOP 10
    (total_logical_reads + total_logical_writes) AS TotalIOMB,
    execution_count,
    (total_logical_reads + total_logical_writes) / execution_count AS AvgIOMB,
    t.text AS SqlText
FROM sys.dm_exec_query_stats
CROSS APPLY sys.dm_exec_sql_text(sql_handle) t
ORDER BY (total_logical_reads + total_logical_writes) DESC;

内存使用监控

生产场景示例:监控SQL Server内存使用情况,包括内存分配、缓冲池使用和内存授予,用于内存配置优化。

sql
-- 查看内存使用情况
SELECT 
    type,
    name,
    SUM(pages_kb) / 1024 AS MemoryMB
FROM sys.dm_os_memory_clerks
GROUP BY type, name
ORDER BY SUM(pages_kb) DESC;

-- 查看缓冲池使用情况
SELECT 
    db_name(database_id) AS DatabaseName,
    COUNT(*) * 8 / 1024 AS BufferPoolMB
FROM sys.dm_os_buffer_descriptors
GROUP BY database_id
ORDER BY BufferPoolMB DESC;

-- 查看单个数据库的缓冲池使用
SELECT 
    OBJECT_NAME(p.object_id) AS TableName,
    COUNT(*) * 8 / 1024 AS BufferPoolMB
FROM sys.dm_os_buffer_descriptors bd
JOIN sys.allocation_units au ON bd.allocation_unit_id = au.allocation_unit_id
JOIN sys.partitions p ON au.container_id = p.hobt_id
WHERE bd.database_id = DB_ID() AND p.object_id > 100
GROUP BY p.object_id
ORDER BY BufferPoolMB DESC;

-- 查看内存授予情况
SELECT 
    session_id,
    request_id,
    granted_memory_kb / 1024 AS GrantedMemoryMB,
    requested_memory_kb / 1024 AS RequestedMemoryMB,
    required_memory_kb / 1024 AS RequiredMemoryMB,
    query_cost,
    timeout_sec,
    resource_semaphore_id
FROM sys.dm_exec_query_memory_grants;

索引和查询性能

生产场景示例:监控索引使用情况、缺失索引和索引碎片,用于索引优化。

sql
-- 查看缺失索引
SELECT 
    migs.avg_total_user_cost * (migs.avg_user_impact / 100.0) * (migs.user_seeks + migs.user_scans) AS ImprovementMeasure,
    OBJECT_NAME(mid.object_id) AS TableName,
    'CREATE INDEX [IX_' + OBJECT_NAME(mid.object_id) + '_' + REPLACE(REPLACE(REPLACE(ISNULL(mid.equality_columns, ''), ', ', '_'), '[', ''), ']', '') + CASE
        WHEN mid.equality_columns IS NOT NULL AND mid.inequality_columns IS NOT NULL THEN '_' ELSE '' END + REPLACE(REPLACE(REPLACE(ISNULL(mid.inequality_columns, ''), ', ', '_'), '[', ''), ']', '') + ']' + ' ON ' + mid.statement + ' (' + ISNULL(mid.equality_columns, '') + CASE WHEN mid.equality_columns IS NOT NULL AND mid.inequality_columns IS NOT NULL THEN ', ' ELSE '' END + ISNULL(mid.inequality_columns, '') + ')' + ISNULL(' INCLUDE (' + mid.included_columns + ')', '') AS CreateIndexStatement
FROM sys.dm_db_missing_index_groups mig
JOIN sys.dm_db_missing_index_group_stats migs ON mig.index_group_handle = migs.group_handle
JOIN sys.dm_db_missing_index_details mid ON mig.index_handle = mid.index_handle
ORDER BY ImprovementMeasure DESC;

-- 查看索引使用情况
SELECT 
    OBJECT_NAME(i.object_id) AS TableName,
    i.name AS IndexName,
    i.type_desc AS IndexType,
    us.user_seeks,
    us.user_scans,
    us.user_lookups,
    us.user_updates,
    us.last_user_seek,
    us.last_user_scan,
    us.last_user_lookup,
    us.last_user_update
FROM sys.indexes i
LEFT JOIN sys.dm_db_index_usage_stats us ON i.object_id = us.object_id AND i.index_id = us.index_id AND us.database_id = DB_ID()
WHERE i.object_id > 100 -- 排除系统表
ORDER BY OBJECT_NAME(i.object_id), i.name;

-- 查看索引碎片
SELECT 
    OBJECT_NAME(object_id) AS TableName,
    name AS IndexName,
    index_type_desc AS IndexType,
    avg_fragmentation_in_percent AS FragmentationPercent,
    page_count AS PageCount
FROM sys.dm_db_index_physical_stats(
    DB_ID(), OBJECT_ID('TableName'), NULL, NULL, 'LIMITED');

系统存储过程和函数

系统信息查询

生产场景示例:查询SQL Server配置、数据库信息和表结构,用于系统管理和故障诊断。

sql
-- 查看服务器配置
EXEC sp_configure;

-- 查看高级配置
EXEC sp_configure 'show advanced options', 1;
RECONFIGURE;
EXEC sp_configure;

-- 查看数据库信息
EXEC sp_helpdb;
EXEC sp_helpdb 'DatabaseName';

-- 查看表信息
EXEC sp_help 'TableName';
EXEC sp_columns 'TableName';
EXEC sp_helpindex 'TableName';

-- 查看作业信息
EXEC msdb.dbo.sp_help_job;
EXEC msdb.dbo.sp_help_jobhistory @job_name = 'JobName';

性能相关存储过程

生产场景示例:查看当前活动进程、锁信息和对象使用情况,用于性能监控和故障诊断。

sql
-- 查看当前活动进程
EXEC sp_who;
EXEC sp_who2;
EXEC sp_who2 'active';

-- 查看当前锁信息
EXEC sp_lock;
-- SQL Server 2005+ 推荐使用动态管理视图
SELECT * FROM sys.dm_tran_locks;

-- 查看对象使用情况
EXEC sp_spaceused 'TableName';
EXEC sp_spaceused 'TableName', 'true'; -- 带更新统计

-- 查看缓存使用情况
EXEC sp_cacheinfo;

-- 清除缓存
DBCC FREEPROCCACHE;
DBCC DROPCLEANBUFFERS;

错误日志查询

生产场景示例:查询SQL Server错误日志和SQL Server Agent日志,用于故障诊断和系统监控。

sql
-- 查看错误日志
EXEC xp_readerrorlog;
EXEC xp_readerrorlog 0, 1, 'Error'; -- 搜索包含"Error"的错误日志
EXEC xp_readerrorlog 0, 1, NULL, NULL, '2023-12-01', '2023-12-31'; -- 指定日期范围

-- 查看SQL Server Agent日志
EXEC msdb.dbo.sp_readerrorlog @p1 = 1; -- 1 表示SQL Server Agent日志

-- 循环错误日志
EXEC sp_cycle_errorlog;
EXEC msdb.dbo.sp_cycle_agent_errorlog; -- 循环SQL Server Agent日志

DBCC 命令

数据库维护命令

生产场景示例:检查数据库完整性、表完整性和索引完整性,用于数据库维护和故障诊断。

sql
-- 检查数据库完整性
DBCC CHECKDB ('DatabaseName') WITH NO_INFOMSGS, ALL_ERRORMSGS;

-- 检查表完整性
DBCC CHECKTABLE ('TableName') WITH NO_INFOMSGS;

-- 检查索引完整性
DBCC CHECKINDEX ('TableName') WITH NO_INFOMSGS;

-- 检查文件组完整性
DBCC CHECKFILEGROUP ('PRIMARY') WITH NO_INFOMSGS;

-- 更新统计信息
DBCC UPDATEUSAGE ('DatabaseName');
DBCC UPDATEUSAGE ('DatabaseName', 'TableName');

性能相关命令

生产场景示例:查看事务日志空间使用情况、IO统计信息和等待统计信息,用于性能监控和故障诊断。

sql
-- 查看事务日志空间使用情况
DBCC SQLPERF(LOGSPACE);

-- 查看IO统计信息
DBCC SQLPERF('IOSTATS');

-- 查看 latch 统计信息
DBCC SQLPERF('LATCHSTATS');

-- 查看等待统计信息
DBCC SQLPERF('WAITSTATS');

-- 重置等待统计信息
DBCC SQLPERF('WAITSTATS', CLEAR);

内存相关命令

生产场景示例:查看内存状态、缓冲池使用情况和清除缓存,用于内存监控和故障诊断。

sql
-- 查看内存状态
DBCC MEMORYSTATUS;

-- 查看缓冲池使用情况
DBCC SHOWBUFFERPOOLUSAGE;

-- 清除缓冲池
DBCC DROPCLEANBUFFERS;

-- 清除计划缓存
DBCC FREEPROCCACHE;

-- 清除特定计划缓存
DBCC FREEPROCCACHE (plan_handle);

-- 清除会话缓存
DBCC FREESESSIONCACHE;

-- 清除系统缓存
DBCC FREESYSTEMCACHE ('ALL');

其他常用 DBCC 命令

生产场景示例:查看会话SQL语句、死锁信息和数据库文件大小,用于故障诊断和系统监控。

sql
-- 查看当前会话ID
DBCC INPUTBUFFER(@@SPID);

-- 查看指定会话的SQL语句
DBCC INPUTBUFFER(session_id);

-- 查看死锁信息(需要启用跟踪标志1222或1448)
DBCC TRACEON(1222, -1); -- 启用死锁跟踪
DBCC TRACESTATUS(1222, -1); -- 查看跟踪状态
DBCC TRACEOFF(1222, -1); -- 关闭死锁跟踪

-- 查看数据库文件大小和空间使用
DBCC SHOWFILESTATS;

-- 收缩数据库
DBCC SHRINKDATABASE ('DatabaseName', 10); -- 收缩到10%可用空间

-- 收缩文件
DBCC SHRINKFILE ('FileName', 500); -- 收缩到500MB

SQL Server Agent 命令

生产场景示例:管理SQL Server Agent作业,包括查看作业列表、作业步骤和作业历史,用于作业监控和管理。

sql
-- 查看作业列表
SELECT 
    job_id,
    name,
    enabled,
    description,
    date_created,
    date_modified
FROM msdb.dbo.sysjobs;

-- 查看作业步骤
SELECT 
    j.name AS JobName,
    js.step_id,
    js.step_name,
    js.subsystem,
    js.command,
    js.on_success_action,
    js.on_fail_action
FROM msdb.dbo.sysjobs j
JOIN msdb.dbo.sysjobsteps js ON j.job_id = js.job_id;

-- 查看作业历史
SELECT 
    j.name AS JobName,
    jh.step_id,
    jh.step_name,
    jh.run_date,
    jh.run_time,
    jh.run_duration,
    jh.sql_severity,
    jh.message
FROM msdb.dbo.sysjobs j
JOIN msdb.dbo.sysjobhistory jh ON j.job_id = jh.job_id
ORDER BY jh.run_date DESC, jh.run_time DESC;

-- 执行作业
EXEC msdb.dbo.sp_start_job @job_name = 'JobName';

-- 停止作业
EXEC msdb.dbo.sp_stop_job @job_name = 'JobName';

-- 禁用作业
EXEC msdb.dbo.sp_update_job @job_name = 'JobName', @enabled = 0;

-- 启用作业
EXEC msdb.dbo.sp_update_job @job_name = 'JobName', @enabled = 1;

扩展事件 (Extended Events)

查看系统定义的扩展事件会话

生产场景示例:查看SQL Server系统定义的扩展事件会话,用于监控和故障诊断。

sql
SELECT 
    name,
    description,
    is_system,
    is_enabled
FROM sys.server_event_sessions;

-- 查看会话事件
SELECT 
    s.name AS SessionName,
    e.name AS EventName,
    e.description AS EventDescription
FROM sys.server_event_sessions s
JOIN sys.server_event_session_events e ON s.event_session_id = e.event_session_id;

创建和使用扩展事件会话

生产场景示例:创建扩展事件会话监控死锁,用于死锁诊断和分析。

sql
-- 创建扩展事件会话监控死锁
CREATE EVENT SESSION [Deadlock_Monitor]
ON SERVER
ADD EVENT sqlserver.lock_deadlock,
ADD EVENT sqlserver.lock_deadlock_chain
ADD TARGET package0.event_file(
    SET filename = 'D:\XEvents\Deadlock_Monitor.xel',
        max_file_size = (5),
        max_rollover_files = (10)
)
WITH (
    MAX_MEMORY = 4096 KB,
    EVENT_RETENTION_MODE = ALLOW_SINGLE_EVENT_LOSS,
    MAX_DISPATCH_LATENCY = 30 SECONDS,
    MAX_EVENT_SIZE = 0 KB,
    MEMORY_PARTITION_MODE = NONE,
    TRACK_CAUSALITY = ON,
    STARTUP_STATE = OFF
);

-- 启动扩展事件会话
ALTER EVENT SESSION [Deadlock_Monitor] ON SERVER STATE = START;

-- 停止扩展事件会话
ALTER EVENT SESSION [Deadlock_Monitor] ON SERVER STATE = STOP;

-- 删除扩展事件会话
DROP EVENT SESSION [Deadlock_Monitor] ON SERVER;

-- 查看扩展事件数据
SELECT 
    event_data = CONVERT(xml, event_data),
    file_name,
    file_offset
FROM sys.fn_xe_file_target_read_file(
    'D:\XEvents\Deadlock_Monitor*.xel',
    NULL, NULL, NULL);

Performance Monitor 计数器

常用计数器

生产场景示例:使用Performance Monitor监控SQL Server性能,包括缓冲区缓存命中率、页预期寿命和批处理请求数。

计数器类别计数器名称说明
SQLServer:Buffer ManagerBuffer cache hit ratio缓冲区缓存命中率,理想值 > 95%
SQLServer:Buffer ManagerPage life expectancy页在缓冲区中的预期寿命,理想值 > 300秒
SQLServer:Buffer ManagerPage reads/sec每秒页读取数
SQLServer:Buffer ManagerPage writes/sec每秒页写入数
SQLServer:General StatisticsUser Connections当前用户连接数
SQLServer:SQL StatisticsBatch Requests/sec每秒批处理请求数
SQLServer:SQL StatisticsSQL Compilations/sec每秒SQL编译数
SQLServer:SQL StatisticsSQL Re-Compilations/sec每秒SQL重编译数
SQLServer:Wait StatisticsLock Wait Time (ms)锁等待时间(毫秒)
SQLServer:Wait StatisticsLatch Wait Time (ms)闩锁等待时间(毫秒)
Process% Processor TimeSQL Server进程CPU使用率
PhysicalDisk% Disk Time磁盘时间百分比
PhysicalDiskAvg. Disk Queue Length平均磁盘队列长度

使用 PowerShell 收集性能计数器

生产场景示例:使用PowerShell脚本自动收集SQL Server性能计数器数据,用于性能监控和趋势分析。

powershell
# PowerShell脚本:收集SQL Server性能计数器
$CounterList = @(
    "SQLServer:Buffer Manager\Buffer cache hit ratio",
    "SQLServer:Buffer Manager\Page life expectancy",
    "SQLServer:General Statistics\User Connections",
    "SQLServer:SQL Statistics\Batch Requests/sec",
    "Processor(_Total)\% Processor Time",
    "PhysicalDisk(_Total)\Avg. Disk Queue Length"
)

$OutputFile = "C:\PerfMonData.csv"
$SampleInterval = 5 # 采样间隔(秒)
$SampleCount = 60 # 采样次数

Import-Module PSSQLite

# 创建性能计数器对象
$PerfCounter = New-Object System.Diagnostics.PerformanceCounterSampleSet $CounterList

# 开始收集数据
$Results = @()
for ($i = 0; $i -lt $SampleCount; $i++) {
    $Sample = $PerfCounter.NextSample()
    $Timestamp = Get-Date
    
    $Row = [PSCustomObject]@{
        Timestamp = $Timestamp
    }
    
    foreach ($Counter in $Sample.Counters) {
        $Row | Add-Member -NotePropertyName $Counter.Path -NotePropertyValue $Counter.CookedValue
    }
    
    $Results += $Row
    Start-Sleep -Seconds $SampleInterval
}

# 导出到CSV
$Results | Export-Csv -Path $OutputFile -NoTypeInformation

监控脚本示例

实时监控活动会话和阻塞

生产场景示例:创建存储过程实时监控活动会话和阻塞,用于快速定位和解决阻塞问题。

sql
-- 创建实时监控活动会话和阻塞的存储过程
CREATE PROCEDURE dbo.usp_MonitorActiveSessions
AS
BEGIN
    SET NOCOUNT ON;
    
    SELECT 
        GETDATE() AS MonitorTime,
        r.session_id,
        s.login_name,
        s.host_name,
        s.program_name,
        r.status,
        r.command,
        r.blocking_session_id,
        r.cpu_time,
        r.reads,
        r.writes,
        r.logical_reads,
        r.row_count,
        r.total_elapsed_time,
        r.wait_type,
        r.wait_time,
        r.wait_resource,
        t.text AS SqlText
    FROM sys.dm_exec_requests r
    JOIN sys.dm_exec_sessions s ON r.session_id = s.session_id
    CROSS APPLY sys.dm_exec_sql_text(r.sql_handle) t
    WHERE r.session_id > 50
    ORDER BY r.total_elapsed_time DESC;
END;
GO

监控索引碎片并生成重组/重建脚本

生产场景示例:创建存储过程监控索引碎片并生成重组/重建脚本,用于自动索引维护。

sql
-- 创建索引碎片监控和维护脚本
CREATE PROCEDURE dbo.usp_MaintainIndexes
    @DatabaseName SYSNAME,
    @FragmentationThresholdLow FLOAT = 5.0,
    @FragmentationThresholdHigh FLOAT = 30.0
AS
BEGIN
    SET NOCOUNT ON;
    
    DECLARE @TableName SYSNAME;
    DECLARE @IndexName SYSNAME;
    DECLARE @Fragmentation FLOAT;
    DECLARE @SQL NVARCHAR(MAX);
    
    -- 创建临时表存储索引碎片信息
    CREATE TABLE #IndexFragmentation (
        DatabaseName SYSNAME,
        SchemaName SYSNAME,
        TableName SYSNAME,
        IndexName SYSNAME,
        Fragmentation FLOAT,
        PageCount INT
    );
    
    -- 插入索引碎片信息
    INSERT INTO #IndexFragmentation
    EXEC sp_executesql N'
        SELECT 
            DB_NAME() AS DatabaseName,
            SCHEMA_NAME(o.schema_id) AS SchemaName,
            o.name AS TableName,
            i.name AS IndexName,
            ips.avg_fragmentation_in_percent AS Fragmentation,
            ips.page_count AS PageCount
        FROM sys.dm_db_index_physical_stats(DB_ID(@DatabaseName), NULL, NULL, NULL, ''LIMITED'') ips
        JOIN sys.indexes i ON ips.object_id = i.object_id AND ips.index_id = i.index_id
        JOIN sys.objects o ON i.object_id = o.object_id
        WHERE o.type = ''U'' AND i.type > 0 AND ips.page_count > 100
    ', N'@DatabaseName SYSNAME', @DatabaseName;
    
    -- 生成并执行索引维护脚本
    DECLARE IndexCursor CURSOR FOR
    SELECT TableName, IndexName, Fragmentation
    FROM #IndexFragmentation
    WHERE Fragmentation >= @FragmentationThresholdLow;
    
    OPEN IndexCursor;
    FETCH NEXT FROM IndexCursor INTO @TableName, @IndexName, @Fragmentation;
    
    WHILE @@FETCH_STATUS = 0
    BEGIN
        IF @Fragmentation < @FragmentationThresholdHigh
        BEGIN
            -- 重组索引(低碎片)
            SET @SQL = N'ALTER INDEX [' + @IndexName + '] ON [' + @DatabaseName + '].[dbo].[' + @TableName + '] REORGANIZE;';
        END
        ELSE
        BEGIN
            -- 重建索引(高碎片)
            SET @SQL = N'ALTER INDEX [' + @IndexName + '] ON [' + @DatabaseName + '].[dbo].[' + @TableName + '] REBUILD WITH (ONLINE = ON);';
        END
        
        PRINT @SQL;
        EXEC sp_executesql @SQL;
        
        FETCH NEXT FROM IndexCursor INTO @TableName, @IndexName, @Fragmentation;
    END
    
    CLOSE IndexCursor;
    DEALLOCATE IndexCursor;
    
    DROP TABLE #IndexFragmentation;
END;
GO

监控磁盘空间使用

生产场景示例:使用PowerShell脚本监控SQL Server数据库磁盘空间,当使用率超过阈值时发送告警邮件。

powershell
# PowerShell脚本:监控SQL Server数据库磁盘空间
Import-Module SqlServer

$SqlInstance = "ServerName"
$Databases = Get-DbaDatabase -SqlInstance $SqlInstance
$AlertThreshold = 80 # 告警阈值(%)
$SmtpServer = "smtp.example.com"
$From = "alerts@example.com"
$To = "dba@example.com"

$AlertDatabases = @()

foreach ($DB in $Databases) {
    if ($DB.Name -notin "master", "model", "msdb", "tempdb") {
        $SpaceInfo = Get-DbaDbSpace -SqlInstance $SqlInstance -Database $DB.Name
        $UsedPercent = [math]::Round(($SpaceInfo.UsedSpaceGB / $SpaceInfo.TotalSpaceGB) * 100, 2)
        
        if ($UsedPercent -ge $AlertThreshold) {
            $AlertDatabases += [PSCustomObject]@{
                DatabaseName = $DB.Name
                TotalSpaceGB = [math]::Round($SpaceInfo.TotalSpaceGB, 2)
                UsedSpaceGB = [math]::Round($SpaceInfo.UsedSpaceGB, 2)
                FreeSpaceGB = [math]::Round($SpaceInfo.FreeSpaceGB, 2)
                UsedPercent = $UsedPercent
            }
        }
    }
}

if ($AlertDatabases.Count -gt 0) {
    $Subject = "告警:SQL Server 数据库磁盘空间不足"
    $Body = $AlertDatabases | Format-Table -AutoSize | Out-String
    Send-MailMessage -SmtpServer $SmtpServer -From $From -To $To -Subject $Subject -Body $Body
}

版本差异

SQL Server 2012及以前

  • 部分DMV功能有限
  • 扩展事件功能相对简单
  • 缺少一些高级监控功能

SQL Server 2014

  • 增强了内存管理相关DMVs
  • 引入了更多性能监控功能
  • 扩展事件功能得到增强

SQL Server 2016

  • 引入了Query Store功能
  • 增强了扩展事件功能
  • 增加了新的DMVs用于查询性能监控

SQL Server 2017

  • 增强了智能查询处理相关功能
  • 扩展事件功能更加完善
  • 增加了更多性能监控和诊断功能

SQL Server 2019

  • 引入了更多智能查询处理功能
  • 增强了扩展事件的性能
  • 增加了新的DMVs用于内存和I/O监控

SQL Server 2022

  • 增强了Query Store功能
  • 引入了更多云集成监控功能
  • 增加了新的DMVs用于性能监控和诊断

监控最佳实践

监控策略

  1. 确定监控目标:明确监控的业务目标和关键指标
  2. 选择监控工具:根据需求选择合适的监控工具(内置工具或第三方工具)
  3. 设置合理的告警阈值:避免过多的误告警
  4. 定期审查监控数据:分析趋势,预测未来需求
  5. 建立监控基线:了解系统正常运行状态
  6. 自动化监控和告警:减少人工干预
  7. 记录和分析告警:持续改进监控策略

性能监控最佳实践

  1. 监控关键性能指标:CPU、内存、磁盘I/O、网络
  2. 监控等待统计信息:识别性能瓶颈
  3. 监控索引使用情况:优化索引设计
  4. 监控查询性能:识别慢查询和资源密集型查询
  5. 监控数据库增长:预测存储需求
  6. 定期进行性能基线测试:了解系统性能变化

故障诊断最佳实践

  1. 建立故障诊断流程:明确故障诊断步骤
  2. 收集完整的诊断数据:包括错误日志、性能计数器、DMV数据等
  3. 分析数据找出根本原因:避免只处理症状
  4. 测试解决方案:确保解决方案有效
  5. 记录故障和解决方案:建立知识库
  6. 定期进行故障演练:提高故障处理能力

常见问题(FAQ)

如何识别SQL Server性能瓶颈?

解答

  1. 使用等待统计信息sys.dm_os_wait_stats 可以显示SQL Server正在等待什么资源
  2. 监控资源使用情况:CPU、内存、磁盘I/O、网络
  3. 分析慢查询:使用 sys.dm_exec_query_stats 查找CPU或IO密集型查询
  4. 检查索引使用情况sys.dm_db_index_usage_stats 可以显示索引是否被有效使用
  5. 查看阻塞情况sys.dm_os_waiting_tasks 可以显示当前阻塞的会话
  6. 监控缓冲区缓存命中率:理想值应 > 95%

如何查找导致CPU使用率高的查询?

解答

sql
-- 查找CPU使用率高的查询
SELECT 
    TOP 10
    total_worker_time / 1000 AS TotalCPUTimeMS,
    execution_count,
    total_worker_time / execution_count / 1000 AS AvgCPUTimeMS,
    t.text AS SqlText,
    qp.query_plan AS QueryPlan
FROM sys.dm_exec_query_stats
CROSS APPLY sys.dm_exec_sql_text(sql_handle) t
CROSS APPLY sys.dm_exec_query_plan(plan_handle) qp
ORDER BY total_worker_time DESC;

如何监控SQL Server的内存使用情况?

解答

  1. 使用动态管理视图

    sql
    SELECT 
        type,
        name,
        SUM(pages_kb) / 1024 AS MemoryMB
    FROM sys.dm_os_memory_clerks
    GROUP BY type, name
    ORDER BY SUM(pages_kb) DESC;
  2. 使用DBCC命令

    sql
    DBCC MEMORYSTATUS;
  3. 使用性能计数器

    • SQLServer:Memory Manager\Total Server Memory (KB)
    • SQLServer:Memory Manager\Target Server Memory (KB)

如何监控SQL Server的磁盘I/O?

解答

  1. 使用动态管理视图

    sql
    SELECT 
        DB_NAME(database_id) AS DatabaseName,
        file_id,
        io_stall_read_ms,
        num_of_reads,
        io_stall_write_ms,
        num_of_writes,
        size_on_disk_bytes / 1024 / 1024 AS SizeMB
    FROM sys.dm_io_virtual_file_stats(NULL, NULL)
    ORDER BY io_stall_read_ms + io_stall_write_ms DESC;
  2. 使用DBCC命令

    sql
    DBCC SQLPERF('IOSTATS');
  3. 使用性能计数器

    • PhysicalDisk\Avg. Disk Queue Length
    • PhysicalDisk% Disk Time
    • PhysicalDisk\Disk Reads/sec
    • PhysicalDisk\Disk Writes/sec

如何监控SQL Server的死锁?

解答

  1. 启用死锁跟踪标志

    sql
    DBCC TRACEON(1222, -1); -- 以XML格式记录死锁
    DBCC TRACEON(1204, -1); -- 以文本格式记录死锁
  2. 使用扩展事件

    sql
    CREATE EVENT SESSION [Deadlock_Monitor] ON SERVER
    ADD EVENT sqlserver.lock_deadlock,
    ADD EVENT sqlserver.lock_deadlock_chain
    ADD TARGET package0.event_file(
        SET filename = 'D:\XEvents\Deadlock_Monitor.xel'
    );
  3. 使用SQL Server Agent告警

    • 配置SQL Server Agent告警,当发生死锁时发送通知

如何清除SQL Server缓存?

解答

sql
-- 清除计划缓存
DBCC FREEPROCCACHE;

-- 清除缓冲区缓存
DBCC DROPCLEANBUFFERS;

-- 清除会话缓存
DBCC FREESESSIONCACHE;

-- 清除系统缓存
DBCC FREESYSTEMCACHE('ALL');

注意:清除缓存会影响性能,只应在测试环境或维护窗口使用。

如何查看SQL Server的版本和补丁级别?

解答

sql
-- 方法1:使用 @@VERSION
SELECT @@VERSION AS SQLServerVersion;

-- 方法2:使用 SERVERPROPERTY
SELECT 
    SERVERPROPERTY('ProductVersion') AS ProductVersion,
    SERVERPROPERTY('ProductLevel') AS ProductLevel,
    SERVERPROPERTY('Edition') AS Edition,
    SERVERPROPERTY('BuildClrVersion') AS BuildClrVersion;

-- 方法3:使用系统存储过程
EXEC xp_msver;

如何监控SQL Server作业执行情况?

解答

  1. 使用系统存储过程

    sql
    EXEC msdb.dbo.sp_help_job;
    EXEC msdb.dbo.sp_help_jobhistory @job_name = 'JobName';
  2. 查询系统表

    sql
    SELECT 
        j.name AS JobName,
        j.enabled,
        jh.run_date,
        jh.run_time,
        jh.run_duration,
        jh.sql_severity,
        jh.message
    FROM msdb.dbo.sysjobs j
    JOIN msdb.dbo.sysjobhistory jh ON j.job_id = jh.job_id
    ORDER BY jh.run_date DESC, jh.run_time DESC;
  3. 使用SQL Server Agent警报

    • 配置SQL Server Agent警报,当作业失败时发送通知

如何使用DMVs监控SQL Server性能?

解答

  1. 监控CPU使用率sys.dm_exec_query_stats
  2. 监控内存使用sys.dm_os_memory_clerks, sys.dm_os_buffer_descriptors
  3. 监控磁盘I/Osys.dm_io_virtual_file_stats
  4. 监控等待统计信息sys.dm_os_wait_stats
  5. 监控查询性能sys.dm_exec_requests, sys.dm_exec_sessions
  6. 监控索引使用sys.dm_db_index_usage_stats, sys.dm_db_index_physical_stats
  7. 监控阻塞情况sys.dm_os_waiting_tasks

如何设置SQL Server性能基线?

解答

  1. 选择基线时间段:选择系统正常运行的时间段
  2. 确定基线指标:选择关键性能指标,如CPU使用率、内存使用率、磁盘I/O、批处理请求数等
  3. 收集基线数据:使用Performance Monitor、DMVs或第三方工具收集数据
  4. 分析基线数据:计算平均值、最大值、最小值等统计信息
  5. 建立基线文档:记录基线数据和分析结果
  6. 定期更新基线:当系统配置或负载发生变化时,更新基线

设置性能基线可以帮助DBA识别系统性能变化,及时发现和解决性能问题。

如何监控SQL Server的事务日志使用情况?

解答

  1. 使用DBCC命令

    sql
    DBCC SQLPERF(LOGSPACE);
  2. 使用动态管理视图

    sql
    SELECT 
        db.name AS DatabaseName,
        mf.name AS LogicalName,
        mf.physical_name AS PhysicalPath,
        mf.size * 8 / 1024 AS SizeMB,
        df.log_reuse_wait_desc AS LogReuseWait
    FROM sys.databases db
    JOIN sys.master_files mf ON db.database_id = mf.database_id
    JOIN sys.dm_db_log_space_usage df ON db.database_id = df.database_id
    WHERE mf.type = 1; -- 1 表示日志文件
  3. 监控日志文件增长事件

    • 可以使用扩展事件监控日志文件增长事件

如何识别和解决SQL Server阻塞问题?

解答

  1. 识别阻塞

    sql
    SELECT 
        wt.session_id AS WaitingSessionId,
        wt.wait_type AS WaitType,
        wt.wait_duration_ms AS WaitDurationMS,
        wt.blocking_session_id AS BlockingSessionId,
        t1.text AS WaitingSqlText,
        t2.text AS BlockingSqlText
    FROM sys.dm_os_waiting_tasks wt
    JOIN sys.dm_exec_requests r1 ON wt.session_id = r1.session_id
    CROSS APPLY sys.dm_exec_sql_text(r1.sql_handle) t1
    LEFT JOIN sys.dm_exec_requests r2 ON wt.blocking_session_id = r2.session_id
    CROSS APPLY sys.dm_exec_sql_text(r2.sql_handle) t2
    WHERE wt.blocking_session_id > 0;
  2. 解决阻塞

    • 优化阻塞查询的性能
    • 调整查询的执行顺序
    • 使用NOLOCK或READPAST提示
    • 增加索引优化查询
    • 考虑使用快照隔离级别

如何监控SQL Server的备份和恢复状态?

解答

  1. 查看备份历史

    sql
    SELECT 
        bs.database_name,
        bs.backup_start_date,
        bs.backup_finish_date,
        bs.backup_size / 1024 / 1024 AS BackupSizeMB,
        bmf.physical_device_name
    FROM msdb.dbo.backupset bs
    JOIN msdb.dbo.backupmediafamily bmf ON bs.media_set_id = bmf.media_set_id
    ORDER BY bs.backup_start_date DESC;
  2. 查看恢复进度

    sql
    SELECT 
        r.session_id,
        r.command,
        r.percent_complete,
        r.total_elapsed_time / 1000 AS ElapsedSeconds,
        r.estimated_completion_time / 1000 AS EstimatedCompletionSeconds,
        t.text AS SqlText
    FROM sys.dm_exec_requests r
    CROSS APPLY sys.dm_exec_sql_text(r.sql_handle) t
    WHERE r.command IN ('RESTORE DATABASE', 'RESTORE LOG');
  3. 监控备份和恢复事件

    • 可以使用SQL Server Agent告警或扩展事件监控备份和恢复事件

如何监控SQL Server的复制状态?

解答

  1. 查看复制代理状态

    sql
    EXEC sp_replmonitorhelppublication;
    EXEC sp_replmonitorhelpdistributionagent;
  2. 查看复制延迟

    sql
    SELECT 
        publication_id,
        publisher_db,
        publication,
        subscriber_db,
        subscriber_server,
        agent_name,
        replication_type,
        last_distsync,
        retention,
        distribution_latency,
        subscriber_latency,
        overall_latency
    FROM distribution.dbo.MSdistribution_agents;
  3. 监控复制错误

    • 可以使用SQL Server Agent告警或扩展事件监控复制错误

如何监控SQL Server的Always On可用性组状态?

解答

  1. 查看可用性组状态

    sql
    SELECT 
        ag.name AS AvailabilityGroupName,
        ar.replica_server_name AS ReplicaServerName,
        ar.availability_mode_desc AS AvailabilityMode,
        ar.failover_mode_desc AS FailoverMode,
        ars.role_desc AS Role,
        ars.operational_state_desc AS OperationalState,
        ars.synchronization_health_desc AS SynchronizationHealth
    FROM sys.availability_groups ag
    JOIN sys.availability_replicas ar ON ag.group_id = ar.group_id
    JOIN sys.dm_hadr_availability_replica_states ars ON ar.replica_id = ars.replica_id;
  2. 查看可用性组数据库状态

    sql
    SELECT 
        ag.name AS AvailabilityGroupName,
        db_name(drs.database_id) AS DatabaseName,
        drs.synchronization_state_desc AS SynchronizationState,
        drs.synchronization_health_desc AS SynchronizationHealth,
        drs.last_hardened_lsn,
        drs.last_received_lsn,
        drs.last_redone_lsn
    FROM sys.availability_groups ag
    JOIN sys.dm_hadr_availability_replica_states ars ON ag.group_id = ars.group_id
    JOIN sys.dm_hadr_database_replica_states drs ON ars.replica_id = drs.replica_id;
  3. 监控可用性组事件

    • 可以使用SQL Server Agent告警或扩展事件监控可用性组事件

如何监控SQL Server的内存压力?

解答

  1. 监控页面预期寿命

    • 性能计数器:SQLServer:Buffer Manager\Page life expectancy
    • 理想值 > 300秒
  2. 监控内存授予等待

    sql
    SELECT 
        wait_type,
        waiting_tasks_count,
        wait_time_ms
    FROM sys.dm_os_wait_stats
    WHERE wait_type LIKE '%MEMORY_GRANT%';
  3. 监控内存 clerk 使用

    sql
    SELECT 
        type,
        name,
        SUM(pages_kb) / 1024 AS MemoryMB
    FROM sys.dm_os_memory_clerks
    GROUP BY type, name
    ORDER BY SUM(pages_kb) DESC;
  4. 监控内存管理器指标

    sql
    SELECT 
        total_server_memory_kb / 1024 AS TotalServerMemoryMB,
        target_server_memory_kb / 1024 AS TargetServerMemoryMB,
        memory_utilization_percentage AS MemoryUtilizationPercentage,
        available_commit_limit_kb / 1024 AS AvailableCommitLimitMB
    FROM sys.dm_os_sys_memory;

当total_server_memory_kb接近target_server_memory_kb时,说明SQL Server正在使用所有分配的内存,可能存在内存压力。