mirror of
https://github.com/chidiwilliams/buzz.git
synced 2026-03-14 22:55:46 +01:00
Adding VAD to whisper.cpp to reduce hallucinations on audio with silences
This commit is contained in:
parent
981dd3a758
commit
e6921a5734
3 changed files with 9 additions and 0 deletions
3
Makefile
3
Makefile
|
|
@ -65,6 +65,7 @@ ifeq ($(OS), Windows_NT)
|
|||
cp whisper.cpp/build/bin/Release/whisper-cli.exe buzz/whisper_cpp/
|
||||
cp whisper.cpp/build/bin/Release/whisper-server.exe buzz/whisper_cpp/
|
||||
cp dll_backup/SDL2.dll buzz/whisper_cpp
|
||||
PowerShell -NoProfile -ExecutionPolicy Bypass -Command "if (-not (Test-Path 'buzz\whisper_cpp\ggml-silero-v6.2.0.bin')) { Start-BitsTransfer -Source https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin -Destination 'buzz\whisper_cpp\ggml-silero-v6.2.0.bin' }"
|
||||
endif
|
||||
|
||||
ifeq ($(shell uname -s), Linux)
|
||||
|
|
@ -82,6 +83,7 @@ ifeq ($(shell uname -s), Linux)
|
|||
cp -P whisper.cpp/build/ggml/src/libggml-base.so* buzz/whisper_cpp/ || true
|
||||
cp -P whisper.cpp/build/ggml/src/libggml-cpu.so* buzz/whisper_cpp/ || true
|
||||
cp -P whisper.cpp/build/ggml/src/ggml-vulkan/libggml-vulkan.so* buzz/whisper_cpp/ || true
|
||||
test -f buzz/whisper_cpp/ggml-silero-v6.2.0.bin || curl -L -o buzz/whisper_cpp/ggml-silero-v6.2.0.bin https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin
|
||||
endif
|
||||
|
||||
# Build on Macs
|
||||
|
|
@ -101,6 +103,7 @@ endif
|
|||
cp whisper.cpp/build/bin/whisper-server buzz/whisper_cpp/ || true
|
||||
cp whisper.cpp/build/src/libwhisper.dylib buzz/whisper_cpp/ || true
|
||||
cp whisper.cpp/build/ggml/src/libggml* buzz/whisper_cpp/ || true
|
||||
test -f buzz/whisper_cpp/ggml-silero-v6.2.0.bin || curl -L -o buzz/whisper_cpp/ggml-silero-v6.2.0.bin https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin
|
||||
endif
|
||||
|
||||
# Prints all the Mac developer identities used for code signing
|
||||
|
|
|
|||
|
|
@ -109,6 +109,11 @@ class WhisperCpp:
|
|||
"-f", file_to_process,
|
||||
]
|
||||
|
||||
# Add VAD if the model is available
|
||||
vad_model_path = os.path.join(os.path.dirname(whisper_cli_path), "ggml-silero-v6.2.0.bin")
|
||||
if os.path.exists(vad_model_path):
|
||||
cmd.extend(["--vad", "--vad-model", vad_model_path])
|
||||
|
||||
# Add translate flag if needed
|
||||
if task.transcription_options.task == Task.TRANSLATE:
|
||||
cmd.extend(["--translate"])
|
||||
|
|
|
|||
|
|
@ -74,6 +74,7 @@
|
|||
<li>Added option to import folder</li>
|
||||
<li>Extra settings for live recordings</li>
|
||||
<li>Update checker for Windows and Macs</li>
|
||||
<li>Added voice activity detection to whisper.cpp</li>
|
||||
</ul>
|
||||
</description>
|
||||
</release>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue