From 5810ac4a2e554234231f036dc53789f18d7ac1bb Mon Sep 17 00:00:00 2001 From: Raivis Dejus Date: Sun, 18 May 2025 16:33:46 +0300 Subject: [PATCH] Fix for Faster whisper (#1170) --- buzz/model_loader.py | 1 + buzz/transcriber/recording_transcriber.py | 14 +++++++++----- buzz/widgets/recording_transcriber_widget.py | 2 +- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/buzz/model_loader.py b/buzz/model_loader.py index 316e6b3f..cc4bce93 100644 --- a/buzz/model_loader.py +++ b/buzz/model_loader.py @@ -417,6 +417,7 @@ def download_faster_whisper_model( "model.bin", # largest by size first "pytorch_model.bin", # possible alternative model filename "config.json", + "preprocessor_config.json", "tokenizer.json", "vocabulary.*", ] diff --git a/buzz/transcriber/recording_transcriber.py b/buzz/transcriber/recording_transcriber.py index e6f3aa3b..1c409e48 100644 --- a/buzz/transcriber/recording_transcriber.py +++ b/buzz/transcriber/recording_transcriber.py @@ -100,11 +100,14 @@ class RecordingTranscriber(QObject): device=device, ) + # This was commented out as it was causing issues. On the other hand some users are reporting errors without + # this. It is possible isseus were present in older model versions without some config files and now are fixed + # # Fix for large-v3 https://github.com/guillaumekln/faster-whisper/issues/547#issuecomment-1797962599 - if self.transcription_options.model.whisper_model_size == WhisperModelSize.LARGEV3: - model.feature_extractor.mel_filters = model.feature_extractor.get_mel_filters( - model.feature_extractor.sampling_rate, model.feature_extractor.n_fft, n_mels=128 - ) + # if self.transcription_options.model.whisper_model_size in {WhisperModelSize.LARGEV3, WhisperModelSize.LARGEV3TURBO}: + # model.feature_extractor.mel_filters = model.feature_extractor.get_mel_filters( + # model.feature_extractor.sampling_rate, model.feature_extractor.n_fft, n_mels=128 + # ) elif self.transcription_options.model.model_type == ModelType.OPEN_AI_WHISPER_API: custom_openai_base_url = self.settings.value( key=Settings.Key.CUSTOM_OPENAI_BASE_URL, default_value="" @@ -190,7 +193,8 @@ class RecordingTranscriber(QObject): task=self.transcription_options.task.value, temperature=self.transcription_options.temperature, initial_prompt=self.transcription_options.initial_prompt, - word_timestamps=self.transcription_options.word_level_timings, + word_timestamps=False, + without_timestamps=True, no_speech_threshold=0.4, ) result = {"text": " ".join([segment.text for segment in whisper_segments])} diff --git a/buzz/widgets/recording_transcriber_widget.py b/buzz/widgets/recording_transcriber_widget.py index b18ca3d2..ad6b2933 100644 --- a/buzz/widgets/recording_transcriber_widget.py +++ b/buzz/widgets/recording_transcriber_widget.py @@ -347,7 +347,7 @@ class RecordingTranscriberWidget(QWidget): self.on_cancel_model_progress_dialog ) - if self.model_download_progress_dialog is not None: + if self.model_download_progress_dialog is not None and total_size > 0: self.model_download_progress_dialog.set_value( fraction_completed=current_size / total_size )