From bb546acbf985b0130ff7215e18282970bad0ef13 Mon Sep 17 00:00:00 2001
From: Raivis Dejus <raivisd@scandiweb.com>
Date: Fri, 20 Feb 2026 15:47:13 +0200
Subject: [PATCH] Fix for windows crashes (#1387)

---
 .github/workflows/ci.yml                      |  6 +-
 .github/workflows/snapcraft.yml               | 12 ---
 buzz/recording.py                             |  4 +-
 buzz/transcriber/recording_transcriber.py     | 19 +++-
 buzz/widgets/audio_meter_widget.py            |  2 +-
 buzz/widgets/recording_transcriber_widget.py  | 91 +++++++++++++------
 .../speaker_identification_widget.py          | 43 +++++----
 hatch_build.py                                | 36 ++++++++
 .../ctc_forced_aligner_windows_mutex.patch    | 16 ++++
 tests/mock_sounddevice.py                     |  8 +-
 .../speaker_identification_widget_test.py     |  8 +-
 11 files changed, 171 insertions(+), 74 deletions(-)
 create mode 100644 patches/ctc_forced_aligner_windows_mutex.patch

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e85b2b63..43cf1cef 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -94,6 +94,8 @@ jobs:
         run: |
           uv run make test
         shell: bash
+        env:
+          PYTHONFAULTHANDLER: "1"
 
       - name: Upload coverage reports to Codecov with GitHub Action
         uses: codecov/codecov-action@v4
@@ -174,10 +176,10 @@ jobs:
       - name: Install dependencies
         run: uv sync
 
-      - uses: AnimMouse/setup-ffmpeg@v1.2.1
+      - uses: AnimMouse/setup-ffmpeg@v1
         id: setup-ffmpeg
         with:
-          version: ${{ matrix.os == 'macos-15-intel' && '7.1.1' || matrix.os == 'macos-latest' && '71' || '7.1' }}
+          version: ${{ matrix.os == 'macos-15-intel' && '7.1.1' || matrix.os == 'macos-latest' && '80' || '8.0' }}
 
       - name: Install MSVC for Windows
         run: |
diff --git a/.github/workflows/snapcraft.yml b/.github/workflows/snapcraft.yml
index 2224688f..a2c8c63c 100644
--- a/.github/workflows/snapcraft.yml
+++ b/.github/workflows/snapcraft.yml
@@ -24,23 +24,11 @@ jobs:
       # Ideas from https://github.com/orgs/community/discussions/25678
       - name: Remove unused build tools
         run: |
-          sudo apt-get remove -y '^llvm-.*'
-          sudo apt-get remove -y 'php.*'
           sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel || true
           sudo apt-get autoremove -y
           sudo apt-get clean
           python -m pip cache purge
           rm -rf /opt/hostedtoolcache || true
-      - name: Maximize build space
-        uses: easimon/maximize-build-space@master
-        with:
-          root-reserve-mb: 45000
-          swap-size-mb: 1024
-          remove-dotnet: 'true'
-          remove-android: 'true'
-          remove-haskell: 'true'
-          remove-codeql: 'true'
-          remove-docker-images: 'true'
       - name: Check available disk space
         run: |
           echo "=== Disk space ==="
diff --git a/buzz/recording.py b/buzz/recording.py
index 598fc561..db77fc52 100644
--- a/buzz/recording.py
+++ b/buzz/recording.py
@@ -27,9 +27,9 @@ class RecordingAmplitudeListener(QObject):
                 callback=self.stream_callback,
             )
             self.stream.start()
-        except sounddevice.PortAudioError:
+        except Exception as e:
             self.stop_recording()
-            logging.exception("")
+            logging.exception("Failed to start audio stream on device %s: %s", self.input_device_index, e)
 
     def stop_recording(self):
         if self.stream is not None:
diff --git a/buzz/transcriber/recording_transcriber.py b/buzz/transcriber/recording_transcriber.py
index fda4d56e..dec3f3dd 100644
--- a/buzz/transcriber/recording_transcriber.py
+++ b/buzz/transcriber/recording_transcriber.py
@@ -302,7 +302,7 @@ class RecordingTranscriber(QObject):
                         next_text: str = result.get("text")
 
                         # Update initial prompt between successive recording chunks
-                        initial_prompt += next_text
+                        initial_prompt = next_text
 
                         logging.debug(
                             "Received next result, length = %s, time taken = %s",
@@ -315,7 +315,11 @@ class RecordingTranscriber(QObject):
 
         except PortAudioError as exc:
             self.error.emit(str(exc))
-            logging.exception("")
+            logging.exception("PortAudio error during recording")
+            return
+        except Exception as exc:
+            logging.exception("Unexpected error during recording")
+            self.error.emit(str(exc))
             return
 
         self.finished.emit()
@@ -361,7 +365,11 @@ class RecordingTranscriber(QObject):
         self.is_running = False
         if self.process and self.process.poll() is None:
             self.process.terminate()
-            self.process.wait(timeout=5)
+            try:
+                self.process.wait(timeout=5)
+            except subprocess.TimeoutExpired:
+                self.process.kill()
+                logging.warning("Whisper server process had to be killed after timeout")
 
     def start_local_whisper_server(self):
         # Reduce verbose HTTP client logging from OpenAI/httpx
@@ -466,4 +474,7 @@ class RecordingTranscriber(QObject):
     def __del__(self):
         if self.process and self.process.poll() is None:
             self.process.terminate()
-            self.process.wait(timeout=5)
\ No newline at end of file
+            try:
+                self.process.wait(timeout=5)
+            except subprocess.TimeoutExpired:
+                self.process.kill()
\ No newline at end of file
diff --git a/buzz/widgets/audio_meter_widget.py b/buzz/widgets/audio_meter_widget.py
index fff4e9d3..0329a25a 100644
--- a/buzz/widgets/audio_meter_widget.py
+++ b/buzz/widgets/audio_meter_widget.py
@@ -76,4 +76,4 @@ class AudioMeterWidget(QWidget):
         self.current_amplitude = max(
             amplitude, self.current_amplitude * self.SMOOTHING_FACTOR
         )
-        self.repaint()
+        self.update()
diff --git a/buzz/widgets/recording_transcriber_widget.py b/buzz/widgets/recording_transcriber_widget.py
index 26cb92c0..a4b82cdb 100644
--- a/buzz/widgets/recording_transcriber_widget.py
+++ b/buzz/widgets/recording_transcriber_widget.py
@@ -1,6 +1,7 @@
 import os
 import re
 import enum
+import time
 import requests
 import logging
 import datetime
@@ -212,7 +213,7 @@ class RecordingTranscriberWidget(QWidget):
         self.presentation_options_bar.hide()
         self.copy_actions_bar = self.create_copy_actions_bar()
         layout.addWidget(self.copy_actions_bar)  # Add at the bottom
-        self.copy_actions_bar.hide() 
+        self.copy_actions_bar.hide()
 
     def create_presentation_options_bar(self) -> QWidget:
         """Crete the presentation options bar widget"""
@@ -296,15 +297,15 @@ class RecordingTranscriberWidget(QWidget):
         layout = QHBoxLayout(bar)
         layout.setContentsMargins(5, 5, 5, 5)
         layout.setSpacing(10)
-        
+
         layout.addStretch()  # Push button to the right
-        
+
         self.copy_transcript_button = QPushButton(_("Copy"), bar)
         self.copy_transcript_button.setToolTip(_("Copy transcription to clipboard"))
         self.copy_transcript_button.clicked.connect(self.on_copy_transcript_clicked)
         layout.addWidget(self.copy_transcript_button)
-        
-        return bar  
+
+        return bar
 
     def on_copy_transcript_clicked(self):
         """Handle copy transcript button click"""
@@ -339,7 +340,7 @@ class RecordingTranscriberWidget(QWidget):
 
         self.copy_transcript_button.setText(_("Copied!"))
         QTimer.singleShot(2000, lambda: self.copy_transcript_button.setText(_("Copy")))
-                
+
     def on_show_presentation_clicked(self):
         """Handle click on 'Show in new window' button"""
         if self.presentation_window is None or not self.presentation_window.isVisible():
@@ -668,6 +669,40 @@ class RecordingTranscriberWidget(QWidget):
 
         return text
 
+    @staticmethod
+    def write_to_export_file(file_path: str, content: str, mode: str = "a", retries: int = 5, delay: float = 0.2):
+        """Write to an export file with retry logic for Windows file locking."""
+        for attempt in range(retries):
+            try:
+                with open(file_path, mode, encoding='utf-8') as f:
+                    f.write(content)
+                return
+            except PermissionError:
+                if attempt < retries - 1:
+                    time.sleep(delay)
+                else:
+                    logging.warning("Export write failed after %d retries: %s", retries, file_path)
+            except OSError as e:
+                logging.warning("Export write failed: %s", e)
+                return
+
+    @staticmethod
+    def read_export_file(file_path: str, retries: int = 5, delay: float = 0.2) -> str:
+        """Read an export file with retry logic for Windows file locking."""
+        for attempt in range(retries):
+            try:
+                with open(file_path, "r", encoding='utf-8') as f:
+                    return f.read()
+            except PermissionError:
+                if attempt < retries - 1:
+                    time.sleep(delay)
+                else:
+                    logging.warning("Export read failed after %d retries: %s", retries, file_path)
+            except OSError as e:
+                logging.warning("Export read failed: %s", e)
+                return ""
+        return ""
+
     # Copilot magic implementation of a sliding window approach to find the longest common substring between two texts,
     # ignoring the initial differences.
     @staticmethod
@@ -722,8 +757,7 @@ class RecordingTranscriberWidget(QWidget):
         text_box.moveCursor(QTextCursor.MoveOperation.End)
 
         if self.export_enabled and export_file:
-            with open(export_file, "w") as f:
-                f.write(merged_texts)
+            self.write_to_export_file(export_file, merged_texts, mode="w")
 
     def on_next_transcription(self, text: str):
         text = self.filter_text(text)
@@ -742,8 +776,7 @@ class RecordingTranscriberWidget(QWidget):
             self.transcription_text_box.moveCursor(QTextCursor.MoveOperation.End)
 
             if self.export_enabled and self.transcript_export_file:
-                with open(self.transcript_export_file, "a") as f:
-                    f.write(text + "\n\n")
+                self.write_to_export_file(self.transcript_export_file, text + "\n\n")
 
         elif self.transcriber_mode == RecordingTranscriberMode.APPEND_ABOVE:
             self.transcription_text_box.moveCursor(QTextCursor.MoveOperation.Start)
@@ -752,13 +785,11 @@ class RecordingTranscriberWidget(QWidget):
             self.transcription_text_box.moveCursor(QTextCursor.MoveOperation.Start)
 
             if self.export_enabled and self.transcript_export_file:
-                with open(self.transcript_export_file, "r") as f:
-                    existing_content = f.read()
-
+                existing_content = ""
+                if os.path.isfile(self.transcript_export_file):
+                    existing_content = self.read_export_file(self.transcript_export_file)
                 new_content = text + "\n\n" + existing_content
-
-                with open(self.transcript_export_file, "w") as f:
-                    f.write(new_content)
+                self.write_to_export_file(self.transcript_export_file, new_content, mode="w")
 
         elif self.transcriber_mode == RecordingTranscriberMode.APPEND_AND_CORRECT:
             self.process_transcription_merge(text, self.transcripts, self.transcription_text_box, self.transcript_export_file)
@@ -792,9 +823,8 @@ class RecordingTranscriberWidget(QWidget):
             self.translation_text_box.insertPlainText(self.strip_newlines(text))
             self.translation_text_box.moveCursor(QTextCursor.MoveOperation.End)
 
-            if self.export_enabled:
-                with open(self.translation_export_file, "a") as f:
-                    f.write(text + "\n\n")
+            if self.export_enabled and self.translation_export_file:
+                self.write_to_export_file(self.translation_export_file, text + "\n\n")
 
         elif self.transcriber_mode == RecordingTranscriberMode.APPEND_ABOVE:
             self.translation_text_box.moveCursor(QTextCursor.MoveOperation.Start)
@@ -802,14 +832,12 @@ class RecordingTranscriberWidget(QWidget):
             self.translation_text_box.insertPlainText("\n\n")
             self.translation_text_box.moveCursor(QTextCursor.MoveOperation.Start)
 
-            if self.export_enabled:
-                with open(self.translation_export_file, "r") as f:
-                    existing_content = f.read()
-
+            if self.export_enabled and self.translation_export_file:
+                existing_content = ""
+                if os.path.isfile(self.translation_export_file):
+                    existing_content = self.read_export_file(self.translation_export_file)
                 new_content = text + "\n\n" + existing_content
-
-                with open(self.translation_export_file, "w") as f:
-                    f.write(new_content)
+                self.write_to_export_file(self.translation_export_file, new_content, mode="w")
 
         elif self.transcriber_mode == RecordingTranscriberMode.APPEND_AND_CORRECT:
             self.process_transcription_merge(text, self.translations, self.translation_text_box, self.translation_export_file)
@@ -842,6 +870,7 @@ class RecordingTranscriberWidget(QWidget):
 
     def on_transcriber_finished(self):
         self.reset_record_button()
+        # Restart amplitude listener now that the transcription stream is closed
         self.reset_recording_amplitude_listener()
 
     def on_transcriber_error(self, error: str):
@@ -899,6 +928,16 @@ class RecordingTranscriberWidget(QWidget):
             self.model_loader.cancel()
 
         self.stop_recording()
+        if self.transcription_thread is not None:
+            try:
+                if self.transcription_thread.isRunning():
+                    if not self.transcription_thread.wait(15_000):
+                        logging.warning("Transcription thread did not finish within timeout")
+            except RuntimeError:
+                # The underlying C++ QThread was already deleted via deleteLater()
+                pass
+            self.transcription_thread = None
+
         if self.recording_amplitude_listener is not None:
             self.recording_amplitude_listener.stop_recording()
             self.recording_amplitude_listener.deleteLater()
diff --git a/buzz/widgets/transcription_viewer/speaker_identification_widget.py b/buzz/widgets/transcription_viewer/speaker_identification_widget.py
index c87f8b0f..94368d0e 100644
--- a/buzz/widgets/transcription_viewer/speaker_identification_widget.py
+++ b/buzz/widgets/transcription_viewer/speaker_identification_widget.py
@@ -45,23 +45,6 @@ from buzz.settings.settings import Settings
 from buzz.widgets.line_edit import LineEdit
 from buzz.transcriber.transcriber import Segment
 
-from ctc_forced_aligner.ctc_forced_aligner import (
-    generate_emissions,
-    get_alignments,
-    get_spans,
-    load_alignment_model,
-    postprocess_results,
-    preprocess_text,
-)
-from whisper_diarization.helpers import (
-    get_realigned_ws_mapping_with_punctuation,
-    get_sentences_speaker_mapping,
-    get_words_speaker_mapping,
-    langs_to_iso,
-    punct_model_langs,
-)
-from deepmultilingualpunctuation.deepmultilingualpunctuation import PunctuationModel
-from whisper_diarization.diarization import MSDDDiarizer
 
 
 def process_in_batches(
@@ -167,6 +150,32 @@ class IdentificationWorker(QObject):
         }
 
     def run(self):
+        try:
+            from ctc_forced_aligner.ctc_forced_aligner import (
+                generate_emissions,
+                get_alignments,
+                get_spans,
+                load_alignment_model,
+                postprocess_results,
+                preprocess_text,
+            )
+            from whisper_diarization.helpers import (
+                get_realigned_ws_mapping_with_punctuation,
+                get_sentences_speaker_mapping,
+                get_words_speaker_mapping,
+                langs_to_iso,
+                punct_model_langs,
+            )
+            from deepmultilingualpunctuation.deepmultilingualpunctuation import PunctuationModel
+            from whisper_diarization.diarization import MSDDDiarizer
+        except ImportError as e:
+            logging.exception("Failed to import speaker identification libraries: %s", e)
+            self.error.emit(
+                _("Speaker identification is not available: failed to load required libraries.")
+                + f"\n\n{e}"
+            )
+            return
+
         diarizer_model = None
         alignment_model = None
 
diff --git a/hatch_build.py b/hatch_build.py
index c94968fe..0aeeab4c 100644
--- a/hatch_build.py
+++ b/hatch_build.py
@@ -82,6 +82,42 @@ class CustomBuildHook(BuildHookInterface):
             # Build ctc_forced_aligner C++ extension in-place
             print("Building ctc_forced_aligner C++ extension...")
             ctc_aligner_dir = project_root / "ctc_forced_aligner"
+
+            # Apply local patches before building.
+            # Uses --check first to avoid touching the working tree unnecessarily,
+            # which is safer in a detached-HEAD submodule.
+            patches_dir = project_root / "patches"
+            for patch_file in sorted(patches_dir.glob("ctc_forced_aligner_*.patch")):
+                # Dry-run forward: succeeds only if patch is NOT yet applied.
+                check_forward = subprocess.run(
+                    ["git", "apply", "--check", "--ignore-whitespace", str(patch_file)],
+                    cwd=ctc_aligner_dir,
+                    capture_output=True,
+                    text=True,
+                )
+                if check_forward.returncode == 0:
+                    # Patch can be applied — do it for real.
+                    subprocess.run(
+                        ["git", "apply", "--ignore-whitespace", str(patch_file)],
+                        cwd=ctc_aligner_dir,
+                        check=True,
+                        capture_output=True,
+                        text=True,
+                    )
+                    print(f"Applied patch: {patch_file.name}")
+                else:
+                    # Dry-run failed — either already applied or genuinely broken.
+                    check_reverse = subprocess.run(
+                        ["git", "apply", "--check", "--reverse", "--ignore-whitespace", str(patch_file)],
+                        cwd=ctc_aligner_dir,
+                        capture_output=True,
+                        text=True,
+                    )
+                    if check_reverse.returncode == 0:
+                        print(f"Patch already applied (skipping): {patch_file.name}")
+                    else:
+                        print(f"WARNING: could not apply patch {patch_file.name}: {check_forward.stderr}", file=sys.stderr)
+
             result = subprocess.run(
                 [sys.executable, "setup.py", "build_ext", "--inplace"],
                 cwd=ctc_aligner_dir,
diff --git a/patches/ctc_forced_aligner_windows_mutex.patch b/patches/ctc_forced_aligner_windows_mutex.patch
new file mode 100644
index 00000000..2940c9ab
--- /dev/null
+++ b/patches/ctc_forced_aligner_windows_mutex.patch
@@ -0,0 +1,16 @@
+diff --git a/setup.py b/setup.py
+index de84a25..386f662 100644
+--- a/setup.py
++++ b/setup.py
+@@ -6,7 +6,10 @@ ext_modules = [
+     Pybind11Extension(
+         "ctc_forced_aligner.ctc_forced_aligner",
+         ["ctc_forced_aligner/forced_align_impl.cpp"],
+-        extra_compile_args=["/O2"] if sys.platform == "win32" else ["-O3"],
++        # /D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR prevents MSVC runtime mutex
++        # static-initializer crash on newer GitHub Actions Windows runners.
++        # See: https://github.com/actions/runner-images/issues/10004
++        extra_compile_args=["/O2", "/D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR"] if sys.platform == "win32" else ["-O3"],
+     )
+ ]
+ 
diff --git a/tests/mock_sounddevice.py b/tests/mock_sounddevice.py
index 72ca7f22..5914e21d 100644
--- a/tests/mock_sounddevice.py
+++ b/tests/mock_sounddevice.py
@@ -1,12 +1,8 @@
 import os
-import time
-import logging
 from threading import Thread, Event
 from typing import Callable, Any
-from unittest.mock import MagicMock
 
 import numpy as np
-import sounddevice
 
 from buzz import whisper_audio
 
@@ -103,7 +99,7 @@ class MockInputStream:
 
     def __init__(
         self,
-        callback: Callable[[np.ndarray, int, Any, sounddevice.CallbackFlags], None],
+        callback: Callable[[np.ndarray, int, Any, Any], None],
         *args,
         **kwargs,
     ):
@@ -131,7 +127,7 @@ class MockInputStream:
             if self._stop_event.is_set():
                 break
             chunk = audio[seek : seek + num_samples_in_chunk]
-            self.callback(chunk, 0, None, sounddevice.CallbackFlags())
+            self.callback(chunk, 0, None, None)
             seek += num_samples_in_chunk
 
             # loop back around
diff --git a/tests/widgets/speaker_identification_widget_test.py b/tests/widgets/speaker_identification_widget_test.py
index 54dc4071..5b65514d 100644
--- a/tests/widgets/speaker_identification_widget_test.py
+++ b/tests/widgets/speaker_identification_widget_test.py
@@ -9,8 +9,8 @@ from buzz.db.entity.transcription import Transcription
 from buzz.db.entity.transcription_segment import TranscriptionSegment
 from buzz.model_loader import ModelType, WhisperModelSize
 from buzz.transcriber.transcriber import Task
-# Underlying libs do not support intel Macs
-if not (platform.system() == "Darwin" and platform.machine() == "x86_64"):
+# Underlying libs do not support intel Macs or Windows (nemo C extensions crash on Windows CI)
+if not (platform.system() == "Darwin" and platform.machine() == "x86_64") and platform.system() != "Windows":
     from buzz.widgets.transcription_viewer.speaker_identification_widget import (
         SpeakerIdentificationWidget,
         IdentificationWorker,
@@ -19,8 +19,8 @@ if not (platform.system() == "Darwin" and platform.machine() == "x86_64"):
 from tests.audio import test_audio_path
 
 @pytest.mark.skipif(
-    platform.system() == "Darwin" and platform.machine() == "x86_64",
-    reason="Skip speaker identification tests on macOS x86_64"
+    (platform.system() == "Darwin" and platform.machine() == "x86_64") or platform.system() == "Windows",
+    reason="Speaker identification dependencies (nemo/texterrors C extensions) crash on Windows and are unsupported on Intel Mac"
 )
 class TestSpeakerIdentificationWidget:
     @pytest.fixture()