From e6921a5734c7e46662887ef94de43e0204557b11 Mon Sep 17 00:00:00 2001 From: Raivis Dejus Date: Sat, 7 Mar 2026 05:54:35 +0200 Subject: [PATCH] Adding VAD to whisper.cpp to reduce hallucinations on audio with silences --- Makefile | 3 +++ buzz/transcriber/whisper_cpp.py | 5 +++++ share/metainfo/io.github.chidiwilliams.Buzz.metainfo.xml | 1 + 3 files changed, 9 insertions(+) diff --git a/Makefile b/Makefile index 6a316b53..4beb3323 100644 --- a/Makefile +++ b/Makefile @@ -65,6 +65,7 @@ ifeq ($(OS), Windows_NT) cp whisper.cpp/build/bin/Release/whisper-cli.exe buzz/whisper_cpp/ cp whisper.cpp/build/bin/Release/whisper-server.exe buzz/whisper_cpp/ cp dll_backup/SDL2.dll buzz/whisper_cpp + PowerShell -NoProfile -ExecutionPolicy Bypass -Command "if (-not (Test-Path 'buzz\whisper_cpp\ggml-silero-v6.2.0.bin')) { Start-BitsTransfer -Source https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin -Destination 'buzz\whisper_cpp\ggml-silero-v6.2.0.bin' }" endif ifeq ($(shell uname -s), Linux) @@ -82,6 +83,7 @@ ifeq ($(shell uname -s), Linux) cp -P whisper.cpp/build/ggml/src/libggml-base.so* buzz/whisper_cpp/ || true cp -P whisper.cpp/build/ggml/src/libggml-cpu.so* buzz/whisper_cpp/ || true cp -P whisper.cpp/build/ggml/src/ggml-vulkan/libggml-vulkan.so* buzz/whisper_cpp/ || true + test -f buzz/whisper_cpp/ggml-silero-v6.2.0.bin || curl -L -o buzz/whisper_cpp/ggml-silero-v6.2.0.bin https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin endif # Build on Macs @@ -101,6 +103,7 @@ endif cp whisper.cpp/build/bin/whisper-server buzz/whisper_cpp/ || true cp whisper.cpp/build/src/libwhisper.dylib buzz/whisper_cpp/ || true cp whisper.cpp/build/ggml/src/libggml* buzz/whisper_cpp/ || true + test -f buzz/whisper_cpp/ggml-silero-v6.2.0.bin || curl -L -o buzz/whisper_cpp/ggml-silero-v6.2.0.bin https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin endif # Prints all the Mac developer identities used for code signing diff --git a/buzz/transcriber/whisper_cpp.py b/buzz/transcriber/whisper_cpp.py index 977b785e..db4aff84 100644 --- a/buzz/transcriber/whisper_cpp.py +++ b/buzz/transcriber/whisper_cpp.py @@ -109,6 +109,11 @@ class WhisperCpp: "-f", file_to_process, ] + # Add VAD if the model is available + vad_model_path = os.path.join(os.path.dirname(whisper_cli_path), "ggml-silero-v6.2.0.bin") + if os.path.exists(vad_model_path): + cmd.extend(["--vad", "--vad-model", vad_model_path]) + # Add translate flag if needed if task.transcription_options.task == Task.TRANSLATE: cmd.extend(["--translate"]) diff --git a/share/metainfo/io.github.chidiwilliams.Buzz.metainfo.xml b/share/metainfo/io.github.chidiwilliams.Buzz.metainfo.xml index cc91b618..fd50fb15 100644 --- a/share/metainfo/io.github.chidiwilliams.Buzz.metainfo.xml +++ b/share/metainfo/io.github.chidiwilliams.Buzz.metainfo.xml @@ -74,6 +74,7 @@
  • Added option to import folder
  • Extra settings for live recordings
  • Update checker for Windows and Macs
  • +
  • Added voice activity detection to whisper.cpp