Adding VAD to whisper.cpp to reduce hallucinations on audio with silences

This commit is contained in:
Raivis Dejus 2026-03-07 05:54:35 +02:00
commit e6921a5734
3 changed files with 9 additions and 0 deletions

View file

@ -65,6 +65,7 @@ ifeq ($(OS), Windows_NT)
cp whisper.cpp/build/bin/Release/whisper-cli.exe buzz/whisper_cpp/
cp whisper.cpp/build/bin/Release/whisper-server.exe buzz/whisper_cpp/
cp dll_backup/SDL2.dll buzz/whisper_cpp
PowerShell -NoProfile -ExecutionPolicy Bypass -Command "if (-not (Test-Path 'buzz\whisper_cpp\ggml-silero-v6.2.0.bin')) { Start-BitsTransfer -Source https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin -Destination 'buzz\whisper_cpp\ggml-silero-v6.2.0.bin' }"
endif
ifeq ($(shell uname -s), Linux)
@ -82,6 +83,7 @@ ifeq ($(shell uname -s), Linux)
cp -P whisper.cpp/build/ggml/src/libggml-base.so* buzz/whisper_cpp/ || true
cp -P whisper.cpp/build/ggml/src/libggml-cpu.so* buzz/whisper_cpp/ || true
cp -P whisper.cpp/build/ggml/src/ggml-vulkan/libggml-vulkan.so* buzz/whisper_cpp/ || true
test -f buzz/whisper_cpp/ggml-silero-v6.2.0.bin || curl -L -o buzz/whisper_cpp/ggml-silero-v6.2.0.bin https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin
endif
# Build on Macs
@ -101,6 +103,7 @@ endif
cp whisper.cpp/build/bin/whisper-server buzz/whisper_cpp/ || true
cp whisper.cpp/build/src/libwhisper.dylib buzz/whisper_cpp/ || true
cp whisper.cpp/build/ggml/src/libggml* buzz/whisper_cpp/ || true
test -f buzz/whisper_cpp/ggml-silero-v6.2.0.bin || curl -L -o buzz/whisper_cpp/ggml-silero-v6.2.0.bin https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin
endif
# Prints all the Mac developer identities used for code signing

View file

@ -109,6 +109,11 @@ class WhisperCpp:
"-f", file_to_process,
]
# Add VAD if the model is available
vad_model_path = os.path.join(os.path.dirname(whisper_cli_path), "ggml-silero-v6.2.0.bin")
if os.path.exists(vad_model_path):
cmd.extend(["--vad", "--vad-model", vad_model_path])
# Add translate flag if needed
if task.transcription_options.task == Task.TRANSLATE:
cmd.extend(["--translate"])

View file

@ -74,6 +74,7 @@
<li>Added option to import folder</li>
<li>Extra settings for live recordings</li>
<li>Update checker for Windows and Macs</li>
<li>Added voice activity detection to whisper.cpp</li>
</ul>
</description>
</release>