From fb93be429649526332aa8ab53f0384ae55e3ddfc Mon Sep 17 00:00:00 2001 From: Chidi Williams Date: Tue, 4 Jul 2023 23:42:41 +0100 Subject: [PATCH] Fix Faster Whisper large transcription (#524) --- buzz/model_loader.py | 11 +++++++---- buzz/transcriber.py | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/buzz/model_loader.py b/buzz/model_loader.py index e5be8ca5..d699d1b7 100644 --- a/buzz/model_loader.py +++ b/buzz/model_loader.py @@ -24,6 +24,11 @@ class WhisperModelSize(enum.Enum): MEDIUM = 'medium' LARGE = 'large' + def to_faster_whisper_model_size(self) -> str: + if self == WhisperModelSize.LARGE: + return "large-v2" + return self.value + class ModelType(enum.Enum): WHISPER = 'Whisper' @@ -159,10 +164,8 @@ class ModelDownloader(QRunnable): return super().close() if self.model.model_type == ModelType.FASTER_WHISPER: - model_size = self.model.whisper_model_size.value \ - if self.model.whisper_model_size != WhisperModelSize.LARGE \ - else "large-v2" - model_path = download_faster_whisper_model(size=model_size, tqdm_class=_tqdm) + model_path = download_faster_whisper_model( + size=self.model.whisper_model_size.to_faster_whisper_model_size(), tqdm_class=_tqdm) self.signals.finished.emit(model_path) return diff --git a/buzz/transcriber.py b/buzz/transcriber.py index 1b962843..e22a64aa 100644 --- a/buzz/transcriber.py +++ b/buzz/transcriber.py @@ -332,7 +332,7 @@ class WhisperFileTranscriber(FileTranscriber): @classmethod def transcribe_faster_whisper(cls, task: FileTranscriptionTask) -> List[Segment]: model = faster_whisper.WhisperModel( - model_size_or_path=task.transcription_options.model.whisper_model_size.value) + model_size_or_path=task.transcription_options.model.whisper_model_size.to_faster_whisper_model_size()) whisper_segments, info = model.transcribe(audio=task.file_path, language=task.transcription_options.language, task=task.transcription_options.task.value,