From bb546acbf985b0130ff7215e18282970bad0ef13 Mon Sep 17 00:00:00 2001 From: Raivis Dejus Date: Fri, 20 Feb 2026 15:47:13 +0200 Subject: [PATCH] Fix for windows crashes (#1387) --- .github/workflows/ci.yml | 6 +- .github/workflows/snapcraft.yml | 12 --- buzz/recording.py | 4 +- buzz/transcriber/recording_transcriber.py | 19 +++- buzz/widgets/audio_meter_widget.py | 2 +- buzz/widgets/recording_transcriber_widget.py | 91 +++++++++++++------ .../speaker_identification_widget.py | 43 +++++---- hatch_build.py | 36 ++++++++ .../ctc_forced_aligner_windows_mutex.patch | 16 ++++ tests/mock_sounddevice.py | 8 +- .../speaker_identification_widget_test.py | 8 +- 11 files changed, 171 insertions(+), 74 deletions(-) create mode 100644 patches/ctc_forced_aligner_windows_mutex.patch diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e85b2b63..43cf1cef 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -94,6 +94,8 @@ jobs: run: | uv run make test shell: bash + env: + PYTHONFAULTHANDLER: "1" - name: Upload coverage reports to Codecov with GitHub Action uses: codecov/codecov-action@v4 @@ -174,10 +176,10 @@ jobs: - name: Install dependencies run: uv sync - - uses: AnimMouse/setup-ffmpeg@v1.2.1 + - uses: AnimMouse/setup-ffmpeg@v1 id: setup-ffmpeg with: - version: ${{ matrix.os == 'macos-15-intel' && '7.1.1' || matrix.os == 'macos-latest' && '71' || '7.1' }} + version: ${{ matrix.os == 'macos-15-intel' && '7.1.1' || matrix.os == 'macos-latest' && '80' || '8.0' }} - name: Install MSVC for Windows run: | diff --git a/.github/workflows/snapcraft.yml b/.github/workflows/snapcraft.yml index 2224688f..a2c8c63c 100644 --- a/.github/workflows/snapcraft.yml +++ b/.github/workflows/snapcraft.yml @@ -24,23 +24,11 @@ jobs: # Ideas from https://github.com/orgs/community/discussions/25678 - name: Remove unused build tools run: | - sudo apt-get remove -y '^llvm-.*' - sudo apt-get remove -y 'php.*' sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel || true sudo apt-get autoremove -y sudo apt-get clean python -m pip cache purge rm -rf /opt/hostedtoolcache || true - - name: Maximize build space - uses: easimon/maximize-build-space@master - with: - root-reserve-mb: 45000 - swap-size-mb: 1024 - remove-dotnet: 'true' - remove-android: 'true' - remove-haskell: 'true' - remove-codeql: 'true' - remove-docker-images: 'true' - name: Check available disk space run: | echo "=== Disk space ===" diff --git a/buzz/recording.py b/buzz/recording.py index 598fc561..db77fc52 100644 --- a/buzz/recording.py +++ b/buzz/recording.py @@ -27,9 +27,9 @@ class RecordingAmplitudeListener(QObject): callback=self.stream_callback, ) self.stream.start() - except sounddevice.PortAudioError: + except Exception as e: self.stop_recording() - logging.exception("") + logging.exception("Failed to start audio stream on device %s: %s", self.input_device_index, e) def stop_recording(self): if self.stream is not None: diff --git a/buzz/transcriber/recording_transcriber.py b/buzz/transcriber/recording_transcriber.py index fda4d56e..dec3f3dd 100644 --- a/buzz/transcriber/recording_transcriber.py +++ b/buzz/transcriber/recording_transcriber.py @@ -302,7 +302,7 @@ class RecordingTranscriber(QObject): next_text: str = result.get("text") # Update initial prompt between successive recording chunks - initial_prompt += next_text + initial_prompt = next_text logging.debug( "Received next result, length = %s, time taken = %s", @@ -315,7 +315,11 @@ class RecordingTranscriber(QObject): except PortAudioError as exc: self.error.emit(str(exc)) - logging.exception("") + logging.exception("PortAudio error during recording") + return + except Exception as exc: + logging.exception("Unexpected error during recording") + self.error.emit(str(exc)) return self.finished.emit() @@ -361,7 +365,11 @@ class RecordingTranscriber(QObject): self.is_running = False if self.process and self.process.poll() is None: self.process.terminate() - self.process.wait(timeout=5) + try: + self.process.wait(timeout=5) + except subprocess.TimeoutExpired: + self.process.kill() + logging.warning("Whisper server process had to be killed after timeout") def start_local_whisper_server(self): # Reduce verbose HTTP client logging from OpenAI/httpx @@ -466,4 +474,7 @@ class RecordingTranscriber(QObject): def __del__(self): if self.process and self.process.poll() is None: self.process.terminate() - self.process.wait(timeout=5) \ No newline at end of file + try: + self.process.wait(timeout=5) + except subprocess.TimeoutExpired: + self.process.kill() \ No newline at end of file diff --git a/buzz/widgets/audio_meter_widget.py b/buzz/widgets/audio_meter_widget.py index fff4e9d3..0329a25a 100644 --- a/buzz/widgets/audio_meter_widget.py +++ b/buzz/widgets/audio_meter_widget.py @@ -76,4 +76,4 @@ class AudioMeterWidget(QWidget): self.current_amplitude = max( amplitude, self.current_amplitude * self.SMOOTHING_FACTOR ) - self.repaint() + self.update() diff --git a/buzz/widgets/recording_transcriber_widget.py b/buzz/widgets/recording_transcriber_widget.py index 26cb92c0..a4b82cdb 100644 --- a/buzz/widgets/recording_transcriber_widget.py +++ b/buzz/widgets/recording_transcriber_widget.py @@ -1,6 +1,7 @@ import os import re import enum +import time import requests import logging import datetime @@ -212,7 +213,7 @@ class RecordingTranscriberWidget(QWidget): self.presentation_options_bar.hide() self.copy_actions_bar = self.create_copy_actions_bar() layout.addWidget(self.copy_actions_bar) # Add at the bottom - self.copy_actions_bar.hide() + self.copy_actions_bar.hide() def create_presentation_options_bar(self) -> QWidget: """Crete the presentation options bar widget""" @@ -296,15 +297,15 @@ class RecordingTranscriberWidget(QWidget): layout = QHBoxLayout(bar) layout.setContentsMargins(5, 5, 5, 5) layout.setSpacing(10) - + layout.addStretch() # Push button to the right - + self.copy_transcript_button = QPushButton(_("Copy"), bar) self.copy_transcript_button.setToolTip(_("Copy transcription to clipboard")) self.copy_transcript_button.clicked.connect(self.on_copy_transcript_clicked) layout.addWidget(self.copy_transcript_button) - - return bar + + return bar def on_copy_transcript_clicked(self): """Handle copy transcript button click""" @@ -339,7 +340,7 @@ class RecordingTranscriberWidget(QWidget): self.copy_transcript_button.setText(_("Copied!")) QTimer.singleShot(2000, lambda: self.copy_transcript_button.setText(_("Copy"))) - + def on_show_presentation_clicked(self): """Handle click on 'Show in new window' button""" if self.presentation_window is None or not self.presentation_window.isVisible(): @@ -668,6 +669,40 @@ class RecordingTranscriberWidget(QWidget): return text + @staticmethod + def write_to_export_file(file_path: str, content: str, mode: str = "a", retries: int = 5, delay: float = 0.2): + """Write to an export file with retry logic for Windows file locking.""" + for attempt in range(retries): + try: + with open(file_path, mode, encoding='utf-8') as f: + f.write(content) + return + except PermissionError: + if attempt < retries - 1: + time.sleep(delay) + else: + logging.warning("Export write failed after %d retries: %s", retries, file_path) + except OSError as e: + logging.warning("Export write failed: %s", e) + return + + @staticmethod + def read_export_file(file_path: str, retries: int = 5, delay: float = 0.2) -> str: + """Read an export file with retry logic for Windows file locking.""" + for attempt in range(retries): + try: + with open(file_path, "r", encoding='utf-8') as f: + return f.read() + except PermissionError: + if attempt < retries - 1: + time.sleep(delay) + else: + logging.warning("Export read failed after %d retries: %s", retries, file_path) + except OSError as e: + logging.warning("Export read failed: %s", e) + return "" + return "" + # Copilot magic implementation of a sliding window approach to find the longest common substring between two texts, # ignoring the initial differences. @staticmethod @@ -722,8 +757,7 @@ class RecordingTranscriberWidget(QWidget): text_box.moveCursor(QTextCursor.MoveOperation.End) if self.export_enabled and export_file: - with open(export_file, "w") as f: - f.write(merged_texts) + self.write_to_export_file(export_file, merged_texts, mode="w") def on_next_transcription(self, text: str): text = self.filter_text(text) @@ -742,8 +776,7 @@ class RecordingTranscriberWidget(QWidget): self.transcription_text_box.moveCursor(QTextCursor.MoveOperation.End) if self.export_enabled and self.transcript_export_file: - with open(self.transcript_export_file, "a") as f: - f.write(text + "\n\n") + self.write_to_export_file(self.transcript_export_file, text + "\n\n") elif self.transcriber_mode == RecordingTranscriberMode.APPEND_ABOVE: self.transcription_text_box.moveCursor(QTextCursor.MoveOperation.Start) @@ -752,13 +785,11 @@ class RecordingTranscriberWidget(QWidget): self.transcription_text_box.moveCursor(QTextCursor.MoveOperation.Start) if self.export_enabled and self.transcript_export_file: - with open(self.transcript_export_file, "r") as f: - existing_content = f.read() - + existing_content = "" + if os.path.isfile(self.transcript_export_file): + existing_content = self.read_export_file(self.transcript_export_file) new_content = text + "\n\n" + existing_content - - with open(self.transcript_export_file, "w") as f: - f.write(new_content) + self.write_to_export_file(self.transcript_export_file, new_content, mode="w") elif self.transcriber_mode == RecordingTranscriberMode.APPEND_AND_CORRECT: self.process_transcription_merge(text, self.transcripts, self.transcription_text_box, self.transcript_export_file) @@ -792,9 +823,8 @@ class RecordingTranscriberWidget(QWidget): self.translation_text_box.insertPlainText(self.strip_newlines(text)) self.translation_text_box.moveCursor(QTextCursor.MoveOperation.End) - if self.export_enabled: - with open(self.translation_export_file, "a") as f: - f.write(text + "\n\n") + if self.export_enabled and self.translation_export_file: + self.write_to_export_file(self.translation_export_file, text + "\n\n") elif self.transcriber_mode == RecordingTranscriberMode.APPEND_ABOVE: self.translation_text_box.moveCursor(QTextCursor.MoveOperation.Start) @@ -802,14 +832,12 @@ class RecordingTranscriberWidget(QWidget): self.translation_text_box.insertPlainText("\n\n") self.translation_text_box.moveCursor(QTextCursor.MoveOperation.Start) - if self.export_enabled: - with open(self.translation_export_file, "r") as f: - existing_content = f.read() - + if self.export_enabled and self.translation_export_file: + existing_content = "" + if os.path.isfile(self.translation_export_file): + existing_content = self.read_export_file(self.translation_export_file) new_content = text + "\n\n" + existing_content - - with open(self.translation_export_file, "w") as f: - f.write(new_content) + self.write_to_export_file(self.translation_export_file, new_content, mode="w") elif self.transcriber_mode == RecordingTranscriberMode.APPEND_AND_CORRECT: self.process_transcription_merge(text, self.translations, self.translation_text_box, self.translation_export_file) @@ -842,6 +870,7 @@ class RecordingTranscriberWidget(QWidget): def on_transcriber_finished(self): self.reset_record_button() + # Restart amplitude listener now that the transcription stream is closed self.reset_recording_amplitude_listener() def on_transcriber_error(self, error: str): @@ -899,6 +928,16 @@ class RecordingTranscriberWidget(QWidget): self.model_loader.cancel() self.stop_recording() + if self.transcription_thread is not None: + try: + if self.transcription_thread.isRunning(): + if not self.transcription_thread.wait(15_000): + logging.warning("Transcription thread did not finish within timeout") + except RuntimeError: + # The underlying C++ QThread was already deleted via deleteLater() + pass + self.transcription_thread = None + if self.recording_amplitude_listener is not None: self.recording_amplitude_listener.stop_recording() self.recording_amplitude_listener.deleteLater() diff --git a/buzz/widgets/transcription_viewer/speaker_identification_widget.py b/buzz/widgets/transcription_viewer/speaker_identification_widget.py index c87f8b0f..94368d0e 100644 --- a/buzz/widgets/transcription_viewer/speaker_identification_widget.py +++ b/buzz/widgets/transcription_viewer/speaker_identification_widget.py @@ -45,23 +45,6 @@ from buzz.settings.settings import Settings from buzz.widgets.line_edit import LineEdit from buzz.transcriber.transcriber import Segment -from ctc_forced_aligner.ctc_forced_aligner import ( - generate_emissions, - get_alignments, - get_spans, - load_alignment_model, - postprocess_results, - preprocess_text, -) -from whisper_diarization.helpers import ( - get_realigned_ws_mapping_with_punctuation, - get_sentences_speaker_mapping, - get_words_speaker_mapping, - langs_to_iso, - punct_model_langs, -) -from deepmultilingualpunctuation.deepmultilingualpunctuation import PunctuationModel -from whisper_diarization.diarization import MSDDDiarizer def process_in_batches( @@ -167,6 +150,32 @@ class IdentificationWorker(QObject): } def run(self): + try: + from ctc_forced_aligner.ctc_forced_aligner import ( + generate_emissions, + get_alignments, + get_spans, + load_alignment_model, + postprocess_results, + preprocess_text, + ) + from whisper_diarization.helpers import ( + get_realigned_ws_mapping_with_punctuation, + get_sentences_speaker_mapping, + get_words_speaker_mapping, + langs_to_iso, + punct_model_langs, + ) + from deepmultilingualpunctuation.deepmultilingualpunctuation import PunctuationModel + from whisper_diarization.diarization import MSDDDiarizer + except ImportError as e: + logging.exception("Failed to import speaker identification libraries: %s", e) + self.error.emit( + _("Speaker identification is not available: failed to load required libraries.") + + f"\n\n{e}" + ) + return + diarizer_model = None alignment_model = None diff --git a/hatch_build.py b/hatch_build.py index c94968fe..0aeeab4c 100644 --- a/hatch_build.py +++ b/hatch_build.py @@ -82,6 +82,42 @@ class CustomBuildHook(BuildHookInterface): # Build ctc_forced_aligner C++ extension in-place print("Building ctc_forced_aligner C++ extension...") ctc_aligner_dir = project_root / "ctc_forced_aligner" + + # Apply local patches before building. + # Uses --check first to avoid touching the working tree unnecessarily, + # which is safer in a detached-HEAD submodule. + patches_dir = project_root / "patches" + for patch_file in sorted(patches_dir.glob("ctc_forced_aligner_*.patch")): + # Dry-run forward: succeeds only if patch is NOT yet applied. + check_forward = subprocess.run( + ["git", "apply", "--check", "--ignore-whitespace", str(patch_file)], + cwd=ctc_aligner_dir, + capture_output=True, + text=True, + ) + if check_forward.returncode == 0: + # Patch can be applied — do it for real. + subprocess.run( + ["git", "apply", "--ignore-whitespace", str(patch_file)], + cwd=ctc_aligner_dir, + check=True, + capture_output=True, + text=True, + ) + print(f"Applied patch: {patch_file.name}") + else: + # Dry-run failed — either already applied or genuinely broken. + check_reverse = subprocess.run( + ["git", "apply", "--check", "--reverse", "--ignore-whitespace", str(patch_file)], + cwd=ctc_aligner_dir, + capture_output=True, + text=True, + ) + if check_reverse.returncode == 0: + print(f"Patch already applied (skipping): {patch_file.name}") + else: + print(f"WARNING: could not apply patch {patch_file.name}: {check_forward.stderr}", file=sys.stderr) + result = subprocess.run( [sys.executable, "setup.py", "build_ext", "--inplace"], cwd=ctc_aligner_dir, diff --git a/patches/ctc_forced_aligner_windows_mutex.patch b/patches/ctc_forced_aligner_windows_mutex.patch new file mode 100644 index 00000000..2940c9ab --- /dev/null +++ b/patches/ctc_forced_aligner_windows_mutex.patch @@ -0,0 +1,16 @@ +diff --git a/setup.py b/setup.py +index de84a25..386f662 100644 +--- a/setup.py ++++ b/setup.py +@@ -6,7 +6,10 @@ ext_modules = [ + Pybind11Extension( + "ctc_forced_aligner.ctc_forced_aligner", + ["ctc_forced_aligner/forced_align_impl.cpp"], +- extra_compile_args=["/O2"] if sys.platform == "win32" else ["-O3"], ++ # /D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR prevents MSVC runtime mutex ++ # static-initializer crash on newer GitHub Actions Windows runners. ++ # See: https://github.com/actions/runner-images/issues/10004 ++ extra_compile_args=["/O2", "/D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR"] if sys.platform == "win32" else ["-O3"], + ) + ] + diff --git a/tests/mock_sounddevice.py b/tests/mock_sounddevice.py index 72ca7f22..5914e21d 100644 --- a/tests/mock_sounddevice.py +++ b/tests/mock_sounddevice.py @@ -1,12 +1,8 @@ import os -import time -import logging from threading import Thread, Event from typing import Callable, Any -from unittest.mock import MagicMock import numpy as np -import sounddevice from buzz import whisper_audio @@ -103,7 +99,7 @@ class MockInputStream: def __init__( self, - callback: Callable[[np.ndarray, int, Any, sounddevice.CallbackFlags], None], + callback: Callable[[np.ndarray, int, Any, Any], None], *args, **kwargs, ): @@ -131,7 +127,7 @@ class MockInputStream: if self._stop_event.is_set(): break chunk = audio[seek : seek + num_samples_in_chunk] - self.callback(chunk, 0, None, sounddevice.CallbackFlags()) + self.callback(chunk, 0, None, None) seek += num_samples_in_chunk # loop back around diff --git a/tests/widgets/speaker_identification_widget_test.py b/tests/widgets/speaker_identification_widget_test.py index 54dc4071..5b65514d 100644 --- a/tests/widgets/speaker_identification_widget_test.py +++ b/tests/widgets/speaker_identification_widget_test.py @@ -9,8 +9,8 @@ from buzz.db.entity.transcription import Transcription from buzz.db.entity.transcription_segment import TranscriptionSegment from buzz.model_loader import ModelType, WhisperModelSize from buzz.transcriber.transcriber import Task -# Underlying libs do not support intel Macs -if not (platform.system() == "Darwin" and platform.machine() == "x86_64"): +# Underlying libs do not support intel Macs or Windows (nemo C extensions crash on Windows CI) +if not (platform.system() == "Darwin" and platform.machine() == "x86_64") and platform.system() != "Windows": from buzz.widgets.transcription_viewer.speaker_identification_widget import ( SpeakerIdentificationWidget, IdentificationWorker, @@ -19,8 +19,8 @@ if not (platform.system() == "Darwin" and platform.machine() == "x86_64"): from tests.audio import test_audio_path @pytest.mark.skipif( - platform.system() == "Darwin" and platform.machine() == "x86_64", - reason="Skip speaker identification tests on macOS x86_64" + (platform.system() == "Darwin" and platform.machine() == "x86_64") or platform.system() == "Windows", + reason="Speaker identification dependencies (nemo/texterrors C extensions) crash on Windows and are unsupported on Intel Mac" ) class TestSpeakerIdentificationWidget: @pytest.fixture()