当前位置：首页 > news >正文

MATLAB 多窗谱谱减法语音去噪

news 2026/6/4 6:45:50

多窗谱谱减法 (Multi-taper Spectral Subtraction) 语音去噪实现，这是一种比传统谱减法更鲁棒的语音增强方法。

MATLAB 实现

2 主程序：多窗谱谱减法语音去噪

%% 多窗谱谱减法语音去噪
% 方法：Multi-taper Spectral Subtraction
% 特点：减少音乐噪声，提高语音质量
% 适用：非平稳噪声环境下的语音增强clear; clc; close all;
warning off;%% 参数设置
frame_len = 256;        % 帧长 (16kHz采样率下约16ms)
frame_shift = 128;      % 帧移 (50%重叠)
fs = 16000;             % 采样率
n_tapers = 4;           % 多窗谱使用的Slepian窗数量
alpha = 2.5;            % 过减因子 (通常2-4)
beta = 0.02;            % 谱下限参数 (防止音乐噪声)
gamma = 0.8;            % 平滑因子
noise_frames = 10;      % 用于噪声估计的初始静音帧数%% 读取语音文件
[noisy_speech, fs] = audioread('noisy_speech.wav');
if size(noisy_speech, 2) > 1noisy_speech = mean(noisy_speech, 2); % 转为单声道
end% 如果有纯净语音用于评估
try[clean_speech, ~] = audioread('clean_speech.wav');has_clean = true;
catchhas_clean = false;
endfprintf('=== 多窗谱谱减法语音去噪 ===\n');
fprintf('采样率: %d Hz\n', fs);
fprintf('帧长: %d 样本 (%.1f ms)\n', frame_len, frame_len/fs*1000);
fprintf('多窗谱窗数: %d\n', n_tapers);
fprintf('过减因子 α: %.1f\n', alpha);
fprintf('谱下限 β: %.3f\n', beta);%% 多窗谱谱减法处理
enhanced_speech = multi_taper_spectral_subtraction(...noisy_speech, fs, frame_len, frame_shift, ...n_tapers, alpha, beta, gamma, noise_frames);%% 评估指标计算
if has_clean% 计算SNR改善snr_before = 10*log10(sum(clean_speech.^2)/sum((noisy_speech-clean_speech).^2));snr_after = 10*log10(sum(clean_speech.^2)/sum((enhanced_speech-clean_speech).^2));fprintf('\n=== 客观评估 ===\n');fprintf('输入SNR: %.2f dB\n', snr_before);fprintf('输出SNR: %.2f dB\n', snr_after);fprintf('SNR改善: %.2f dB\n', snr_after - snr_before);% 计算PESQ分数（需要Voicebox工具箱）trypesq_score = pesq(clean_speech, enhanced_speech, fs);fprintf('PESQ分数: %.3f\n', pesq_score);catchfprintf('PESQ分数: 未计算（需要Voicebox工具箱）\n');end
end%% 保存结果
audiowrite('enhanced_speech.wav', enhanced_speech, fs);
fprintf('增强语音已保存为: enhanced_speech.wav\n');%% 可视化结果
visualize_results(noisy_speech, enhanced_speech, clean_speech, fs, has_clean);%% ========== 核心函数 ==========function enhanced = multi_taper_spectral_subtraction(...noisy, fs, frame_len, frame_shift, n_tapers, alpha, beta, gamma, noise_frames)% 多窗谱谱减法主函数% 分帧frames = enframe(noisy, frame_len, frame_shift);n_frames = size(frames, 1);% 生成Slepian窗（多窗谱的核心）slepian_windows = dpss(frame_len, n_tapers);% 初始化enhanced_frames = zeros(size(frames));noise_psd = zeros(frame_len, 1);prev_gain = ones(frame_len, 1);% 噪声估计（使用前几帧）for i = 1:noise_framesframe = frames(i, :)';noise_psd = noise_psd + abs(fft(frame)).^2;endnoise_psd = noise_psd / noise_frames;% 处理每一帧for i = 1:n_framesframe = frames(i, :)';% 多窗谱估计multi_taper_spec = zeros(frame_len, 1);for taper = 1:n_taperswindowed_frame = frame .* slepian_windows(taper, :)';spec = fft(windowed_frame);multi_taper_spec = multi_taper_spec + abs(spec).^2;endmulti_taper_spec = multi_taper_spec / n_tapers;% 计算增益函数（谱减法）gain = max(1 - alpha * sqrt(noise_psd ./ (multi_taper_spec + eps)), beta);% 时间平滑gain = gamma * prev_gain + (1 - gamma) * gain;prev_gain = gain;% 应用增益enhanced_spec = gain .* fft(frame);% IFFT得到时域信号enhanced_frame = real(ifft(enhanced_spec));enhanced_frames(i, :) = enhanced_frame';% 更新噪声估计（语音活动检测）if i > noise_framesnoise_psd = 0.95 * noise_psd + 0.05 * multi_taper_spec;endend% 重叠相加重构语音enhanced = overlap_add(enhanced_frames, frame_shift);
endfunction frames = enframe(x, win_len, shift_len)% 分帧函数n_frames = floor((length(x) - win_len) / shift_len) + 1;frames = zeros(n_frames, win_len);for i = 1:n_framesstart_idx = (i-1)*shift_len + 1;end_idx = start_idx + win_len - 1;frames(i, :) = x(start_idx:end_idx);end
endfunction x = overlap_add(frames, shift_len)% 重叠相加重构[n_frames, frame_len] = size(frames);x_len = (n_frames-1)*shift_len + frame_len;x = zeros(x_len, 1);for i = 1:n_framesstart_idx = (i-1)*shift_len + 1;end_idx = start_idx + frame_len - 1;x(start_idx:end_idx) = x(start_idx:end_idx) + frames(i, :)';end
endfunction windows = dpss(N, K)% 生成离散扁长球序列（DPSS）窗% N: 窗长度% K: 窗数量% 返回: K×N 的窗矩阵% 时间带宽积NW = 3;% 计算DPSS[windows, ~] = dpss(N, NW, K);% 归一化for i = 1:Kwindows(i, :) = windows(i, :) / norm(windows(i, :));end
end%% ========== 可视化函数 ==========
function visualize_results(noisy, enhanced, clean, fs, has_clean)% 可视化结果% 时间轴t = (0:length(noisy)-1)/fs;figure('Position', [100, 100, 1200, 800]);% 波形图subplot(3, 2, 1);plot(t, noisy, 'b', 'LineWidth', 1);xlabel('时间 (s)'); ylabel('幅度');title('带噪语音波形');grid on; axis tight;subplot(3, 2, 2);plot(t, enhanced, 'r', 'LineWidth', 1);xlabel('时间 (s)'); ylabel('幅度');title('增强语音波形');grid on; axis tight;if has_cleansubplot(3, 2, 3);plot(t, clean, 'g', 'LineWidth', 1);xlabel('时间 (s)'); ylabel('幅度');title('纯净语音波形');grid on; axis tight;end% 语谱图subplot(3, 2, 4);spectrogram(noisy, hamming(256), 128, 512, fs, 'yaxis');title('带噪语音语谱图');colorbar;subplot(3, 2, 5);spectrogram(enhanced, hamming(256), 128, 512, fs, 'yaxis');title('增强语音语谱图');colorbar;if has_cleansubplot(3, 2, 6);spectrogram(clean, hamming(256), 128, 512, fs, 'yaxis');title('纯净语音语谱图');colorbar;end% 频谱对比figure('Position', [100, 100, 800, 600]);% 计算平均频谱noisy_spec = abs(fft(enframe(noisy, 1024, 512)));enhanced_spec = abs(fft(enframe(enhanced, 1024, 512)));freq = (0:size(noisy_spec, 2)-1)*fs/size(noisy_spec, 2);subplot(2, 1, 1);plot(freq(1:512), mean(noisy_spec(:, 1:512), 1), 'b', 'LineWidth', 1.5);hold on;plot(freq(1:512), mean(enhanced_spec(:, 1:512), 1), 'r', 'LineWidth', 1.5);xlabel('频率 (Hz)'); ylabel('幅度');title('平均频谱对比');legend('带噪语音', '增强语音');grid on; axis tight;% 频谱差异subplot(2, 1, 2);diff_spec = mean(enhanced_spec(:, 1:512), 1) - mean(noisy_spec(:, 1:512), 1);plot(freq(1:512), diff_spec, 'g', 'LineWidth', 1.5);xlabel('频率 (Hz)'); ylabel('幅度差');title('频谱差异 (增强 - 带噪)');grid on; axis tight;sgtitle('多窗谱谱减法语音增强结果');
end

2 改进的变参数多窗谱谱减法

%% 改进版：自适应多窗谱谱减法
function enhanced = adaptive_multi_taper_ss(noisy, fs)% 自适应参数的多窗谱谱减法% 参数frame_len = 256;frame_shift = 128;n_tapers = 4;% 分帧frames = enframe(noisy, frame_len, frame_shift);n_frames = size(frames, 1);% 生成Slepian窗slepian_windows = dpss(frame_len, n_tapers);% 初始化enhanced_frames = zeros(size(frames));noise_psd = zeros(frame_len, 1);prev_gain = ones(frame_len, 1);% 初始噪声估计noise_est_frames = min(10, round(n_frames/10));for i = 1:noise_est_framesframe = frames(i, :)';noise_psd = noise_psd + abs(fft(frame)).^2;endnoise_psd = noise_psd / noise_est_frames;% 处理每一帧for i = 1:n_framesframe = frames(i, :)';% 多窗谱估计multi_taper_spec = zeros(frame_len, 1);for taper = 1:n_taperswindowed_frame = frame .* slepian_windows(taper, :)';spec = fft(windowed_frame);multi_taper_spec = multi_taper_spec + abs(spec).^2;endmulti_taper_spec = multi_taper_spec / n_tapers;% 计算信噪比snr_est = 10*log10(mean(multi_taper_spec) / mean(noise_psd));% 自适应过减因子if snr_est > 20alpha = 1.0;  % 高信噪比，少减elseif snr_est > 10alpha = 2.0;elseif snr_est > 0alpha = 3.0;elsealpha = 4.0;  % 低信噪比，多减end% 自适应谱下限beta = 0.01 * (1 - exp(-snr_est/10));% 计算增益函数gain = max(1 - alpha * sqrt(noise_psd ./ (multi_taper_spec + eps)), beta);% 时间平滑gamma = 0.7 + 0.2 * exp(-snr_est/5);  % 自适应平滑因子gain = gamma * prev_gain + (1 - gamma) * gain;prev_gain = gain;% 应用增益enhanced_spec = gain .* fft(frame);% IFFTenhanced_frame = real(ifft(enhanced_spec));enhanced_frames(i, :) = enhanced_frame';% 更新噪声估计（语音活动检测）vad_threshold = 0.1;if mean(multi_taper_spec) < mean(noise_psd) * (1 + vad_threshold)noise_psd = 0.99 * noise_psd + 0.01 * multi_taper_spec;endend% 重构语音enhanced = overlap_add(enhanced_frames, frame_shift);
end

3 多窗谱谱减法与传统方法对比

%% 多窗谱谱减法与传统方法对比
function compare_methods(noisy, fs)% 对比传统谱减法、维纳滤波和多窗谱谱减法frame_len = 256;frame_shift = 128;% 方法1：传统谱减法enhanced_ss = traditional_spectral_subtraction(noisy, fs, frame_len, frame_shift);% 方法2：维纳滤波enhanced_wiener = wiener_filtering(noisy, fs, frame_len, frame_shift);% 方法3：多窗谱谱减法enhanced_mtss = multi_taper_spectral_subtraction(...noisy, fs, frame_len, frame_shift, 4, 2.5, 0.02, 0.8, 10);% 可视化对比figure('Position', [100, 100, 1400, 800]);t = (0:length(noisy)-1)/fs;% 波形对比subplot(2, 2, 1);plot(t, noisy, 'b', 'LineWidth', 1);title('带噪语音');xlabel('时间 (s)'); ylabel('幅度');grid on;subplot(2, 2, 2);plot(t, enhanced_ss, 'r', 'LineWidth', 1);title('传统谱减法');xlabel('时间 (s)'); ylabel('幅度');grid on;subplot(2, 2, 3);plot(t, enhanced_wiener, 'g', 'LineWidth', 1);title('维纳滤波');xlabel('时间 (s)'); ylabel('幅度');grid on;subplot(2, 2, 4);plot(t, enhanced_mtss, 'm', 'LineWidth', 1);title('多窗谱谱减法');xlabel('时间 (s)'); ylabel('幅度');grid on;% 语谱图对比figure('Position', [100, 100, 1400, 800]);subplot(2, 2, 1);spectrogram(noisy, hamming(256), 128, 512, fs, 'yaxis');title('带噪语音');subplot(2, 2, 2);spectrogram(enhanced_ss, hamming(256), 128, 512, fs, 'yaxis');title('传统谱减法');subplot(2, 2, 3);spectrogram(enhanced_wiener, hamming(256), 128, 512, fs, 'yaxis');title('维纳滤波');subplot(2, 2, 4);spectrogram(enhanced_mtss, hamming(256), 128, 512, fs, 'yaxis');title('多窗谱谱减法');% 计算客观指标fprintf('\n=== 方法对比 ===\n');% 计算音乐噪声（高频能量比）music_noise_ss = calculate_music_noise(enhanced_ss, fs);music_noise_wiener = calculate_music_noise(enhanced_wiener, fs);music_noise_mtss = calculate_music_noise(enhanced_mtss, fs);fprintf('音乐噪声指标（越小越好）：\n');fprintf('传统谱减法: %.4f\n', music_noise_ss);fprintf('维纳滤波: %.4f\n', music_noise_wiener);fprintf('多窗谱谱减法: %.4f\n', music_noise_mtss);
endfunction music_noise = calculate_music_noise(signal, fs)% 计算音乐噪声指标（高频能量比）[frames, ~] = buffer(signal, 256, 128, 'nodelay');music_noise = 0;for i = 1:size(frames, 2)frame = frames(:, i);spec = abs(fft(frame));freq = (0:length(spec)-1)*fs/length(spec);% 高频能量（>4kHz）与总能量比high_freq_energy = sum(spec(freq > 4000));total_energy = sum(spec);music_noise = music_noise + high_freq_energy / (total_energy + eps);endmusic_noise = music_noise / size(frames, 2);
end

4 实际应用示例

%% 实际应用示例：处理真实录音
function practical_example()% 录制或加载语音fs = 16000;duration = 5;  % 录制5秒fprintf('请说话...（录制%d秒）\n', duration);recObj = audiorecorder(fs, 16, 1);recordblocking(recObj, duration);noisy = getaudiodata(recObj, 'double');% 添加模拟噪声（白噪声）noise_level = 0.05;noisy = noisy + noise_level * randn(size(noisy));% 应用多窗谱谱减法enhanced = multi_taper_spectral_subtraction(...noisy, fs, 256, 128, 4, 2.5, 0.02, 0.8, 10);% 播放结果fprintf('播放带噪语音...\n');sound(noisy, fs);pause(duration + 1);fprintf('播放增强语音...\n');sound(enhanced, fs);% 保存结果audiowrite('recorded_noisy.wav', noisy, fs);audiowrite('recorded_enhanced.wav', enhanced, fs);fprintf('录音已保存\n');
end%% 批量处理文件夹中的音频文件
function batch_process(folder_path)files = dir(fullfile(folder_path, '*.wav'));for i = 1:length(files)filename = fullfile(folder_path, files(i).name);[noisy, fs] = audioread(filename);% 增强处理enhanced = multi_taper_spectral_subtraction(...noisy, fs, 256, 128, 4, 2.5, 0.02, 0.8, 10);% 保存增强后的文件[path, name, ext] = fileparts(filename);enhanced_filename = fullfile(path, [name '_enhanced' ext]);audiowrite(enhanced_filename, enhanced, fs);fprintf('处理完成: %s -> %s\n', files(i).name, [name '_enhanced' ext]);end
end

参考代码多窗谱谱减法，语音去噪 www.youwenfan.com/contentcnt/77736.html

参数调优指南

参数	推荐值	作用	调整建议
`n_tapers`	3-5	多窗谱窗数量	增加可减少频谱估计方差，但增加计算量
`alpha`	2.0-4.0	过减因子	噪声越大，值应越大
`beta`	0.01-0.05	谱下限	防止音乐噪声，太小会产生残留噪声
`gamma`	0.7-0.9	时间平滑因子	越大越平滑，但可能丢失瞬态信息
`noise_frames`	5-20	噪声估计帧数	根据静音段长度调整

算法优势

减少音乐噪声：多窗谱估计降低频谱估计的方差
更好的语音保留：自适应参数调整
计算效率：相比传统谱减法增加有限计算量
鲁棒性：对非平稳噪声有更好的适应性

性能评估指标

%% 综合评估函数
function evaluate_performance(clean, noisy, enhanced, fs)% 计算多种客观指标% SNR改善snr_before = 10*log10(sum(clean.^2)/sum((noisy-clean).^2));snr_after = 10*log10(sum(clean.^2)/sum((enhanced-clean).^2));% 分段信噪比 (SegSNR)seg_snr_before = segmental_snr(clean, noisy, fs);seg_snr_after = segmental_snr(clean, enhanced, fs);% 对数谱距离 (LSD)lsd = log_spectral_distance(clean, enhanced, fs);% 语音质量感知评估 (PESQ)trypesq_score = pesq(clean, enhanced, fs);catchpesq_score = NaN;endfprintf('\n=== 性能评估 ===\n');fprintf('输入SNR: %.2f dB\n', snr_before);fprintf('输出SNR: %.2f dB\n', snr_after);fprintf('SNR改善: %.2f dB\n', snr_after - snr_before);fprintf('分段SNR改善: %.2f dB\n', seg_snr_after - seg_snr_before);fprintf('对数谱距离: %.2f\n', lsd);if ~isnan(pesq_score)fprintf('PESQ分数: %.3f\n', pesq_score);end
endfunction seg_snr = segmental_snr(clean, enhanced, fs)% 计算分段信噪比frame_len = 256;frame_shift = 128;frames_clean = enframe(clean, frame_len, frame_shift);frames_enhanced = enframe(enhanced, frame_len, frame_shift);seg_snr = 0;n_frames = size(frames_clean, 1);for i = 1:n_framessignal_power = mean(frames_clean(i, :).^2);noise_power = mean((frames_enhanced(i, :) - frames_clean(i, :)).^2);if noise_power > 0seg_snr = seg_snr + 10*log10(signal_power/noise_power);endendseg_snr = seg_snr / n_frames;
endfunction lsd = log_spectral_distance(clean, enhanced, fs)% 计算对数谱距离frame_len = 256;frame_shift = 128;frames_clean = enframe(clean, frame_len, frame_shift);frames_enhanced = enframe(enhanced, frame_len, frame_shift);lsd = 0;n_frames = size(frames_clean, 1);for i = 1:n_framesspec_clean = abs(fft(frames_clean(i, :)'));spec_enhanced = abs(fft(frames_enhanced(i, :)'));% 避免零值spec_clean = max(spec_clean, eps);spec_enhanced = max(spec_enhanced, eps);% 对数谱距离lsd = lsd + sqrt(mean((10*log10(spec_clean) - 10*log10(spec_enhanced)).^2));endlsd = lsd / n_frames;
end