WIP, initial refactoring to use whisper-cli

2026-03-14 14:45:46 +01:00 · 2025-10-05 13:17:18 +03:00 · 2025-10-05 13:17:18 +03:00 · b090339160
commit b090339160
parent 44dae86f05
3 changed files with 48 additions and 23 deletions
--- a/33
+++ b/33
@ -73,28 +73,25 @@ ifeq ($(OS), Windows_NT)
 endif

 ifeq ($(shell uname -s), Linux)
-	# Build Whisper for CPU
-	-rm -rf whisper.cpp/build || true
-	-mkdir -p buzz/whisper_cpp
-	cmake -S whisper.cpp -B whisper.cpp/build/ -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_RPATH='$$ORIGIN' -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON
-	cmake --build whisper.cpp/build -j --config Release --verbose
-	cp whisper.cpp/build/src/libwhisper.so buzz/whisper_cpp/libwhisper.so || true
-	cp whisper.cpp/build/ggml/src/libggml.so buzz/whisper_cpp || true
-	cp whisper.cpp/build/ggml/src/libggml-base.so buzz/whisper_cpp || true
-	cp whisper.cpp/build/ggml/src/libggml-cpu.so buzz/whisper_cpp || true
-
-	# Build Whisper for Vulkan
+	# Build Whisper with Vulkan support
 	rm -rf whisper.cpp/build || true
-	-mkdir -p buzz/whisper_cpp_vulkan
-	cmake -S whisper.cpp -B whisper.cpp/build/ -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_RPATH='$$ORIGIN' -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DGGML_VULKAN=1
+	-mkdir -p buzz/whisper_cpp
+	cmake -S whisper.cpp -B whisper.cpp/build/ -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_RPATH='$ORIGIN' -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DGGML_VULKAN=1
 	cmake --build whisper.cpp/build -j --config Release --verbose
-	cp whisper.cpp/build/src/libwhisper.so buzz/whisper_cpp_vulkan/whisper-vulkan.so || true
-	cp whisper.cpp/build/ggml/src/libggml.so buzz/whisper_cpp_vulkan || true
-	cp whisper.cpp/build/ggml/src/libggml-base.so buzz/whisper_cpp_vulkan || true
-	cp whisper.cpp/build/ggml/src/libggml-cpu.so buzz/whisper_cpp_vulkan || true
-	cp whisper.cpp/build/ggml/src/ggml-vulkan/libggml-vulkan.so buzz/whisper_cpp_vulkan || true
+	cp whisper.cpp/build/bin/whisper-cli buzz/whisper_cpp/ || true
+	cp whisper.cpp/build/src/libwhisper.so buzz/whisper_cpp/ || true
+	cp whisper.cpp/build/src/libwhisper.so.1 buzz/whisper_cpp/ || true
+	cp whisper.cpp/build/src/libwhisper.so.1.7.6 buzz/whisper_cpp/ || true
+	cp whisper.cpp/build/ggml/src/libggml.so buzz/whisper_cpp/ || true
+	cp whisper.cpp/build/ggml/src/libggml-base.so buzz/whisper_cpp/ || true
+	cp whisper.cpp/build/ggml/src/libggml-cpu.so buzz/whisper_cpp/ || true
+	cp whisper.cpp/build/ggml/src/ggml-vulkan/libggml-vulkan.so buzz/whisper_cpp/ || true
 endif

+# TODO
+# TODO
+# TODO
+# TODO
 # Build on Macs
 ifeq ($(shell uname -s), Darwin)
 	-rm -rf whisper.cpp/build || true
--- a/buzz/file_transcriber_queue_worker.py
+++ b/buzz/file_transcriber_queue_worker.py
@ -15,6 +15,9 @@ from buzz.transcriber.openai_whisper_api_file_transcriber import (
    OpenAIWhisperAPIFileTranscriber,
 )
 from buzz.transcriber.transcriber import FileTranscriptionTask, Segment
+# TODO Remove unused import and class
+# TODO Remove unused import and class
+# TODO Remove unused import and class
 from buzz.transcriber.whisper_cpp_file_transcriber import WhisperCppFileTranscriber
 from buzz.transcriber.whisper_file_transcriber import WhisperFileTranscriber

@ -86,14 +89,13 @@ class FileTranscriberQueueWorker(QObject):
        logging.debug("Starting next transcription task")

        model_type = self.current_task.transcription_options.model.model_type
-        if model_type == ModelType.WHISPER_CPP:
-            self.current_transcriber = WhisperCppFileTranscriber(task=self.current_task)
-        elif model_type == ModelType.OPEN_AI_WHISPER_API:
+        if model_type == ModelType.OPEN_AI_WHISPER_API:
            self.current_transcriber = OpenAIWhisperAPIFileTranscriber(
                task=self.current_task
            )
        elif (
-            model_type == ModelType.HUGGING_FACE
+            model_type == ModelType.WHISPER_CPP
+            or model_type == ModelType.HUGGING_FACE
            or model_type == ModelType.WHISPER
            or model_type == ModelType.FASTER_WHISPER
        ):
--- a/buzz/transcriber/whisper_file_transcriber.py
+++ b/buzz/transcriber/whisper_file_transcriber.py
@ -101,7 +101,9 @@ class WhisperFileTranscriber(FileTranscriber):
        cls, stderr_conn: Connection, task: FileTranscriptionTask
    ) -> None:
        with pipe_stderr(stderr_conn):
-            if task.transcription_options.model.model_type == ModelType.HUGGING_FACE:
+            if task.transcription_options.model.model_type == ModelType.WHISPER_CPP:
+                segments = cls.transcribe_whisper_cpp(task)
+            elif task.transcription_options.model.model_type == ModelType.HUGGING_FACE:
                sys.stderr.write("0%\n")
                segments = cls.transcribe_hugging_face(task)
                sys.stderr.write("100%\n")
@ -120,6 +122,30 @@ class WhisperFileTranscriber(FileTranscriber):
            sys.stderr.write(f"segments = {segments_json}\n")
            sys.stderr.write(WhisperFileTranscriber.READ_LINE_THREAD_STOP_TOKEN + "\n")

+    @classmethod
+    def transcribe_whisper_cpp(cls, task: FileTranscriptionTask) -> List[Segment]:
+        model = TransformersWhisper(task.model_path)
+        language = (
+            task.transcription_options.language
+            if task.transcription_options.language is not None
+            else "en"
+        )
+        result = model.transcribe(
+            audio=task.file_path,
+            language=language,
+            task=task.transcription_options.task.value,
+            word_timestamps=task.transcription_options.word_level_timings,
+        )
+        return [
+            Segment(
+                start=int(segment.get("start") * 1000),
+                end=int(segment.get("end") * 1000),
+                text=segment.get("text"),
+                translation=""
+            )
+            for segment in result.get("segments")
+        ]
+
    @classmethod
    def transcribe_hugging_face(cls, task: FileTranscriptionTask) -> List[Segment]:
        model = TransformersWhisper(task.model_path)