Fix for windows crashes (#1387)

2026-03-14 14:45:46 +01:00 · 2026-02-20 15:47:13 +02:00 · 2026-02-20 15:47:13 +02:00 · bb546acbf9
commit bb546acbf9
parent ca8b7876fd
11 changed files with 171 additions and 74 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -94,6 +94,8 @@ jobs:
        run: |
          uv run make test
        shell: bash
+        env:
+          PYTHONFAULTHANDLER: "1"

      - name: Upload coverage reports to Codecov with GitHub Action
        uses: codecov/codecov-action@v4
@ -174,10 +176,10 @@ jobs:
      - name: Install dependencies
        run: uv sync

-      - uses: AnimMouse/setup-ffmpeg@v1.2.1
+      - uses: AnimMouse/setup-ffmpeg@v1
        id: setup-ffmpeg
        with:
-          version: ${{ matrix.os == 'macos-15-intel' && '7.1.1' || matrix.os == 'macos-latest' && '71' || '7.1' }}
+          version: ${{ matrix.os == 'macos-15-intel' && '7.1.1' || matrix.os == 'macos-latest' && '80' || '8.0' }}

      - name: Install MSVC for Windows
        run: |
--- a/.github/workflows/snapcraft.yml
+++ b/.github/workflows/snapcraft.yml
@ -24,23 +24,11 @@ jobs:
      # Ideas from https://github.com/orgs/community/discussions/25678
      - name: Remove unused build tools
        run: |
-          sudo apt-get remove -y '^llvm-.*'
-          sudo apt-get remove -y 'php.*'
          sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel || true
          sudo apt-get autoremove -y
          sudo apt-get clean
          python -m pip cache purge
          rm -rf /opt/hostedtoolcache || true
-      - name: Maximize build space
-        uses: easimon/maximize-build-space@master
-        with:
-          root-reserve-mb: 45000
-          swap-size-mb: 1024
-          remove-dotnet: 'true'
-          remove-android: 'true'
-          remove-haskell: 'true'
-          remove-codeql: 'true'
-          remove-docker-images: 'true'
      - name: Check available disk space
        run: |
          echo "=== Disk space ==="
--- a/buzz/recording.py
+++ b/buzz/recording.py
@ -27,9 +27,9 @@ class RecordingAmplitudeListener(QObject):
                callback=self.stream_callback,
            )
            self.stream.start()
-        except sounddevice.PortAudioError:
+        except Exception as e:
            self.stop_recording()
-            logging.exception("")
+            logging.exception("Failed to start audio stream on device %s: %s", self.input_device_index, e)

    def stop_recording(self):
        if self.stream is not None:
--- a/buzz/transcriber/recording_transcriber.py
+++ b/buzz/transcriber/recording_transcriber.py
@ -302,7 +302,7 @@ class RecordingTranscriber(QObject):
                        next_text: str = result.get("text")

                        # Update initial prompt between successive recording chunks
-                        initial_prompt += next_text
+                        initial_prompt = next_text

                        logging.debug(
                            "Received next result, length = %s, time taken = %s",
@ -315,7 +315,11 @@ class RecordingTranscriber(QObject):

        except PortAudioError as exc:
            self.error.emit(str(exc))
-            logging.exception("")
+            logging.exception("PortAudio error during recording")
+            return
+        except Exception as exc:
+            logging.exception("Unexpected error during recording")
+            self.error.emit(str(exc))
            return

        self.finished.emit()
@ -361,7 +365,11 @@ class RecordingTranscriber(QObject):
        self.is_running = False
        if self.process and self.process.poll() is None:
            self.process.terminate()
-            self.process.wait(timeout=5)
+            try:
+                self.process.wait(timeout=5)
+            except subprocess.TimeoutExpired:
+                self.process.kill()
+                logging.warning("Whisper server process had to be killed after timeout")

    def start_local_whisper_server(self):
        # Reduce verbose HTTP client logging from OpenAI/httpx
@ -466,4 +474,7 @@ class RecordingTranscriber(QObject):
    def __del__(self):
        if self.process and self.process.poll() is None:
            self.process.terminate()
-            self.process.wait(timeout=5)
+            try:
+                self.process.wait(timeout=5)
+            except subprocess.TimeoutExpired:
+                self.process.kill()
--- a/buzz/widgets/audio_meter_widget.py
+++ b/buzz/widgets/audio_meter_widget.py
@ -76,4 +76,4 @@ class AudioMeterWidget(QWidget):
        self.current_amplitude = max(
            amplitude, self.current_amplitude * self.SMOOTHING_FACTOR
        )
-        self.repaint()
+        self.update()
--- a/buzz/widgets/recording_transcriber_widget.py
+++ b/buzz/widgets/recording_transcriber_widget.py
@ -1,6 +1,7 @@
 import os
 import re
 import enum
+import time
 import requests
 import logging
 import datetime
@ -212,7 +213,7 @@ class RecordingTranscriberWidget(QWidget):
        self.presentation_options_bar.hide()
        self.copy_actions_bar = self.create_copy_actions_bar()
        layout.addWidget(self.copy_actions_bar)  # Add at the bottom
-        self.copy_actions_bar.hide() 
+        self.copy_actions_bar.hide()

    def create_presentation_options_bar(self) -> QWidget:
        """Crete the presentation options bar widget"""
@ -296,15 +297,15 @@ class RecordingTranscriberWidget(QWidget):
        layout = QHBoxLayout(bar)
        layout.setContentsMargins(5, 5, 5, 5)
        layout.setSpacing(10)
-        
+
        layout.addStretch()  # Push button to the right
-        
+
        self.copy_transcript_button = QPushButton(_("Copy"), bar)
        self.copy_transcript_button.setToolTip(_("Copy transcription to clipboard"))
        self.copy_transcript_button.clicked.connect(self.on_copy_transcript_clicked)
        layout.addWidget(self.copy_transcript_button)
-        
-        return bar  
+
+        return bar

    def on_copy_transcript_clicked(self):
        """Handle copy transcript button click"""
@ -339,7 +340,7 @@ class RecordingTranscriberWidget(QWidget):

        self.copy_transcript_button.setText(_("Copied!"))
        QTimer.singleShot(2000, lambda: self.copy_transcript_button.setText(_("Copy")))
-                
+
    def on_show_presentation_clicked(self):
        """Handle click on 'Show in new window' button"""
        if self.presentation_window is None or not self.presentation_window.isVisible():
@ -668,6 +669,40 @@ class RecordingTranscriberWidget(QWidget):

        return text

+    @staticmethod
+    def write_to_export_file(file_path: str, content: str, mode: str = "a", retries: int = 5, delay: float = 0.2):
+        """Write to an export file with retry logic for Windows file locking."""
+        for attempt in range(retries):
+            try:
+                with open(file_path, mode, encoding='utf-8') as f:
+                    f.write(content)
+                return
+            except PermissionError:
+                if attempt < retries - 1:
+                    time.sleep(delay)
+                else:
+                    logging.warning("Export write failed after %d retries: %s", retries, file_path)
+            except OSError as e:
+                logging.warning("Export write failed: %s", e)
+                return
+
+    @staticmethod
+    def read_export_file(file_path: str, retries: int = 5, delay: float = 0.2) -> str:
+        """Read an export file with retry logic for Windows file locking."""
+        for attempt in range(retries):
+            try:
+                with open(file_path, "r", encoding='utf-8') as f:
+                    return f.read()
+            except PermissionError:
+                if attempt < retries - 1:
+                    time.sleep(delay)
+                else:
+                    logging.warning("Export read failed after %d retries: %s", retries, file_path)
+            except OSError as e:
+                logging.warning("Export read failed: %s", e)
+                return ""
+        return ""
+
    # Copilot magic implementation of a sliding window approach to find the longest common substring between two texts,
    # ignoring the initial differences.
    @staticmethod
@ -722,8 +757,7 @@ class RecordingTranscriberWidget(QWidget):
        text_box.moveCursor(QTextCursor.MoveOperation.End)

        if self.export_enabled and export_file:
-            with open(export_file, "w") as f:
-                f.write(merged_texts)
+            self.write_to_export_file(export_file, merged_texts, mode="w")

    def on_next_transcription(self, text: str):
        text = self.filter_text(text)
@ -742,8 +776,7 @@ class RecordingTranscriberWidget(QWidget):
            self.transcription_text_box.moveCursor(QTextCursor.MoveOperation.End)

            if self.export_enabled and self.transcript_export_file:
-                with open(self.transcript_export_file, "a") as f:
-                    f.write(text + "\n\n")
+                self.write_to_export_file(self.transcript_export_file, text + "\n\n")

        elif self.transcriber_mode == RecordingTranscriberMode.APPEND_ABOVE:
            self.transcription_text_box.moveCursor(QTextCursor.MoveOperation.Start)
@ -752,13 +785,11 @@ class RecordingTranscriberWidget(QWidget):
            self.transcription_text_box.moveCursor(QTextCursor.MoveOperation.Start)

            if self.export_enabled and self.transcript_export_file:
-                with open(self.transcript_export_file, "r") as f:
-                    existing_content = f.read()
-
+                existing_content = ""
+                if os.path.isfile(self.transcript_export_file):
+                    existing_content = self.read_export_file(self.transcript_export_file)
                new_content = text + "\n\n" + existing_content
-
-                with open(self.transcript_export_file, "w") as f:
-                    f.write(new_content)
+                self.write_to_export_file(self.transcript_export_file, new_content, mode="w")

        elif self.transcriber_mode == RecordingTranscriberMode.APPEND_AND_CORRECT:
            self.process_transcription_merge(text, self.transcripts, self.transcription_text_box, self.transcript_export_file)
@ -792,9 +823,8 @@ class RecordingTranscriberWidget(QWidget):
            self.translation_text_box.insertPlainText(self.strip_newlines(text))
            self.translation_text_box.moveCursor(QTextCursor.MoveOperation.End)

-            if self.export_enabled:
-                with open(self.translation_export_file, "a") as f:
-                    f.write(text + "\n\n")
+            if self.export_enabled and self.translation_export_file:
+                self.write_to_export_file(self.translation_export_file, text + "\n\n")

        elif self.transcriber_mode == RecordingTranscriberMode.APPEND_ABOVE:
            self.translation_text_box.moveCursor(QTextCursor.MoveOperation.Start)
@ -802,14 +832,12 @@ class RecordingTranscriberWidget(QWidget):
            self.translation_text_box.insertPlainText("\n\n")
            self.translation_text_box.moveCursor(QTextCursor.MoveOperation.Start)

-            if self.export_enabled:
-                with open(self.translation_export_file, "r") as f:
-                    existing_content = f.read()
-
+            if self.export_enabled and self.translation_export_file:
+                existing_content = ""
+                if os.path.isfile(self.translation_export_file):
+                    existing_content = self.read_export_file(self.translation_export_file)
                new_content = text + "\n\n" + existing_content
-
-                with open(self.translation_export_file, "w") as f:
-                    f.write(new_content)
+                self.write_to_export_file(self.translation_export_file, new_content, mode="w")

        elif self.transcriber_mode == RecordingTranscriberMode.APPEND_AND_CORRECT:
            self.process_transcription_merge(text, self.translations, self.translation_text_box, self.translation_export_file)
@ -842,6 +870,7 @@ class RecordingTranscriberWidget(QWidget):

    def on_transcriber_finished(self):
        self.reset_record_button()
+        # Restart amplitude listener now that the transcription stream is closed
        self.reset_recording_amplitude_listener()

    def on_transcriber_error(self, error: str):
@ -899,6 +928,16 @@ class RecordingTranscriberWidget(QWidget):
            self.model_loader.cancel()

        self.stop_recording()
+        if self.transcription_thread is not None:
+            try:
+                if self.transcription_thread.isRunning():
+                    if not self.transcription_thread.wait(15_000):
+                        logging.warning("Transcription thread did not finish within timeout")
+            except RuntimeError:
+                # The underlying C++ QThread was already deleted via deleteLater()
+                pass
+            self.transcription_thread = None
+
        if self.recording_amplitude_listener is not None:
            self.recording_amplitude_listener.stop_recording()
            self.recording_amplitude_listener.deleteLater()
--- a/buzz/widgets/transcription_viewer/speaker_identification_widget.py
+++ b/buzz/widgets/transcription_viewer/speaker_identification_widget.py
@ -45,23 +45,6 @@ from buzz.settings.settings import Settings
 from buzz.widgets.line_edit import LineEdit
 from buzz.transcriber.transcriber import Segment

-from ctc_forced_aligner.ctc_forced_aligner import (
-    generate_emissions,
-    get_alignments,
-    get_spans,
-    load_alignment_model,
-    postprocess_results,
-    preprocess_text,
-)
-from whisper_diarization.helpers import (
-    get_realigned_ws_mapping_with_punctuation,
-    get_sentences_speaker_mapping,
-    get_words_speaker_mapping,
-    langs_to_iso,
-    punct_model_langs,
-)
-from deepmultilingualpunctuation.deepmultilingualpunctuation import PunctuationModel
-from whisper_diarization.diarization import MSDDDiarizer


 def process_in_batches(
@ -167,6 +150,32 @@ class IdentificationWorker(QObject):
        }

    def run(self):
+        try:
+            from ctc_forced_aligner.ctc_forced_aligner import (
+                generate_emissions,
+                get_alignments,
+                get_spans,
+                load_alignment_model,
+                postprocess_results,
+                preprocess_text,
+            )
+            from whisper_diarization.helpers import (
+                get_realigned_ws_mapping_with_punctuation,
+                get_sentences_speaker_mapping,
+                get_words_speaker_mapping,
+                langs_to_iso,
+                punct_model_langs,
+            )
+            from deepmultilingualpunctuation.deepmultilingualpunctuation import PunctuationModel
+            from whisper_diarization.diarization import MSDDDiarizer
+        except ImportError as e:
+            logging.exception("Failed to import speaker identification libraries: %s", e)
+            self.error.emit(
+                _("Speaker identification is not available: failed to load required libraries.")
+                + f"\n\n{e}"
+            )
+            return
+
        diarizer_model = None
        alignment_model = None

--- a/hatch_build.py
+++ b/hatch_build.py
@ -82,6 +82,42 @@ class CustomBuildHook(BuildHookInterface):
            # Build ctc_forced_aligner C++ extension in-place
            print("Building ctc_forced_aligner C++ extension...")
            ctc_aligner_dir = project_root / "ctc_forced_aligner"
+
+            # Apply local patches before building.
+            # Uses --check first to avoid touching the working tree unnecessarily,
+            # which is safer in a detached-HEAD submodule.
+            patches_dir = project_root / "patches"
+            for patch_file in sorted(patches_dir.glob("ctc_forced_aligner_*.patch")):
+                # Dry-run forward: succeeds only if patch is NOT yet applied.
+                check_forward = subprocess.run(
+                    ["git", "apply", "--check", "--ignore-whitespace", str(patch_file)],
+                    cwd=ctc_aligner_dir,
+                    capture_output=True,
+                    text=True,
+                )
+                if check_forward.returncode == 0:
+                    # Patch can be applied — do it for real.
+                    subprocess.run(
+                        ["git", "apply", "--ignore-whitespace", str(patch_file)],
+                        cwd=ctc_aligner_dir,
+                        check=True,
+                        capture_output=True,
+                        text=True,
+                    )
+                    print(f"Applied patch: {patch_file.name}")
+                else:
+                    # Dry-run failed — either already applied or genuinely broken.
+                    check_reverse = subprocess.run(
+                        ["git", "apply", "--check", "--reverse", "--ignore-whitespace", str(patch_file)],
+                        cwd=ctc_aligner_dir,
+                        capture_output=True,
+                        text=True,
+                    )
+                    if check_reverse.returncode == 0:
+                        print(f"Patch already applied (skipping): {patch_file.name}")
+                    else:
+                        print(f"WARNING: could not apply patch {patch_file.name}: {check_forward.stderr}", file=sys.stderr)
+
            result = subprocess.run(
                [sys.executable, "setup.py", "build_ext", "--inplace"],
                cwd=ctc_aligner_dir,
--- a/patches/ctc_forced_aligner_windows_mutex.patch
+++ b/patches/ctc_forced_aligner_windows_mutex.patch
@ -0,0 +1,16 @@
+diff --git a/setup.py b/setup.py
+index de84a25..386f662 100644
+--- a/setup.py
+++ b/setup.py
+@@ -6,7 +6,10 @@ ext_modules = [
+     Pybind11Extension(
+         "ctc_forced_aligner.ctc_forced_aligner",
+         ["ctc_forced_aligner/forced_align_impl.cpp"],
+-        extra_compile_args=["/O2"] if sys.platform == "win32" else ["-O3"],
+        # /D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR prevents MSVC runtime mutex
+        # static-initializer crash on newer GitHub Actions Windows runners.
+        # See: https://github.com/actions/runner-images/issues/10004
+        extra_compile_args=["/O2", "/D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR"] if sys.platform == "win32" else ["-O3"],
+     )
+ ]
+ 
--- a/tests/mock_sounddevice.py
+++ b/tests/mock_sounddevice.py
@ -1,12 +1,8 @@
 import os
-import time
-import logging
 from threading import Thread, Event
 from typing import Callable, Any
-from unittest.mock import MagicMock

 import numpy as np
-import sounddevice

 from buzz import whisper_audio

@ -103,7 +99,7 @@ class MockInputStream:

    def __init__(
        self,
-        callback: Callable[[np.ndarray, int, Any, sounddevice.CallbackFlags], None],
+        callback: Callable[[np.ndarray, int, Any, Any], None],
        *args,
        **kwargs,
    ):
@ -131,7 +127,7 @@ class MockInputStream:
            if self._stop_event.is_set():
                break
            chunk = audio[seek : seek + num_samples_in_chunk]
-            self.callback(chunk, 0, None, sounddevice.CallbackFlags())
+            self.callback(chunk, 0, None, None)
            seek += num_samples_in_chunk

            # loop back around
--- a/tests/widgets/speaker_identification_widget_test.py
+++ b/tests/widgets/speaker_identification_widget_test.py
@ -9,8 +9,8 @@ from buzz.db.entity.transcription import Transcription
 from buzz.db.entity.transcription_segment import TranscriptionSegment
 from buzz.model_loader import ModelType, WhisperModelSize
 from buzz.transcriber.transcriber import Task
-# Underlying libs do not support intel Macs
-if not (platform.system() == "Darwin" and platform.machine() == "x86_64"):
+# Underlying libs do not support intel Macs or Windows (nemo C extensions crash on Windows CI)
+if not (platform.system() == "Darwin" and platform.machine() == "x86_64") and platform.system() != "Windows":
    from buzz.widgets.transcription_viewer.speaker_identification_widget import (
        SpeakerIdentificationWidget,
        IdentificationWorker,
@ -19,8 +19,8 @@ if not (platform.system() == "Darwin" and platform.machine() == "x86_64"):
 from tests.audio import test_audio_path

@pytest.mark.skipif(
-    platform.system() == "Darwin" and platform.machine() == "x86_64",
-    reason="Skip speaker identification tests on macOS x86_64"
+    (platform.system() == "Darwin" and platform.machine() == "x86_64") or platform.system() == "Windows",
+    reason="Speaker identification dependencies (nemo/texterrors C extensions) crash on Windows and are unsupported on Intel Mac"
 )
 class TestSpeakerIdentificationWidget:
    @pytest.fixture()