Fix for windows crashes (#1387)

This commit is contained in:
Raivis Dejus 2026-02-20 15:47:13 +02:00 committed by GitHub
commit bb546acbf9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 171 additions and 74 deletions

View file

@ -94,6 +94,8 @@ jobs:
run: |
uv run make test
shell: bash
env:
PYTHONFAULTHANDLER: "1"
- name: Upload coverage reports to Codecov with GitHub Action
uses: codecov/codecov-action@v4
@ -174,10 +176,10 @@ jobs:
- name: Install dependencies
run: uv sync
- uses: AnimMouse/setup-ffmpeg@v1.2.1
- uses: AnimMouse/setup-ffmpeg@v1
id: setup-ffmpeg
with:
version: ${{ matrix.os == 'macos-15-intel' && '7.1.1' || matrix.os == 'macos-latest' && '71' || '7.1' }}
version: ${{ matrix.os == 'macos-15-intel' && '7.1.1' || matrix.os == 'macos-latest' && '80' || '8.0' }}
- name: Install MSVC for Windows
run: |

View file

@ -24,23 +24,11 @@ jobs:
# Ideas from https://github.com/orgs/community/discussions/25678
- name: Remove unused build tools
run: |
sudo apt-get remove -y '^llvm-.*'
sudo apt-get remove -y 'php.*'
sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel || true
sudo apt-get autoremove -y
sudo apt-get clean
python -m pip cache purge
rm -rf /opt/hostedtoolcache || true
- name: Maximize build space
uses: easimon/maximize-build-space@master
with:
root-reserve-mb: 45000
swap-size-mb: 1024
remove-dotnet: 'true'
remove-android: 'true'
remove-haskell: 'true'
remove-codeql: 'true'
remove-docker-images: 'true'
- name: Check available disk space
run: |
echo "=== Disk space ==="

View file

@ -27,9 +27,9 @@ class RecordingAmplitudeListener(QObject):
callback=self.stream_callback,
)
self.stream.start()
except sounddevice.PortAudioError:
except Exception as e:
self.stop_recording()
logging.exception("")
logging.exception("Failed to start audio stream on device %s: %s", self.input_device_index, e)
def stop_recording(self):
if self.stream is not None:

View file

@ -302,7 +302,7 @@ class RecordingTranscriber(QObject):
next_text: str = result.get("text")
# Update initial prompt between successive recording chunks
initial_prompt += next_text
initial_prompt = next_text
logging.debug(
"Received next result, length = %s, time taken = %s",
@ -315,7 +315,11 @@ class RecordingTranscriber(QObject):
except PortAudioError as exc:
self.error.emit(str(exc))
logging.exception("")
logging.exception("PortAudio error during recording")
return
except Exception as exc:
logging.exception("Unexpected error during recording")
self.error.emit(str(exc))
return
self.finished.emit()
@ -361,7 +365,11 @@ class RecordingTranscriber(QObject):
self.is_running = False
if self.process and self.process.poll() is None:
self.process.terminate()
self.process.wait(timeout=5)
try:
self.process.wait(timeout=5)
except subprocess.TimeoutExpired:
self.process.kill()
logging.warning("Whisper server process had to be killed after timeout")
def start_local_whisper_server(self):
# Reduce verbose HTTP client logging from OpenAI/httpx
@ -466,4 +474,7 @@ class RecordingTranscriber(QObject):
def __del__(self):
if self.process and self.process.poll() is None:
self.process.terminate()
self.process.wait(timeout=5)
try:
self.process.wait(timeout=5)
except subprocess.TimeoutExpired:
self.process.kill()

View file

@ -76,4 +76,4 @@ class AudioMeterWidget(QWidget):
self.current_amplitude = max(
amplitude, self.current_amplitude * self.SMOOTHING_FACTOR
)
self.repaint()
self.update()

View file

@ -1,6 +1,7 @@
import os
import re
import enum
import time
import requests
import logging
import datetime
@ -212,7 +213,7 @@ class RecordingTranscriberWidget(QWidget):
self.presentation_options_bar.hide()
self.copy_actions_bar = self.create_copy_actions_bar()
layout.addWidget(self.copy_actions_bar) # Add at the bottom
self.copy_actions_bar.hide()
self.copy_actions_bar.hide()
def create_presentation_options_bar(self) -> QWidget:
"""Crete the presentation options bar widget"""
@ -296,15 +297,15 @@ class RecordingTranscriberWidget(QWidget):
layout = QHBoxLayout(bar)
layout.setContentsMargins(5, 5, 5, 5)
layout.setSpacing(10)
layout.addStretch() # Push button to the right
self.copy_transcript_button = QPushButton(_("Copy"), bar)
self.copy_transcript_button.setToolTip(_("Copy transcription to clipboard"))
self.copy_transcript_button.clicked.connect(self.on_copy_transcript_clicked)
layout.addWidget(self.copy_transcript_button)
return bar
return bar
def on_copy_transcript_clicked(self):
"""Handle copy transcript button click"""
@ -339,7 +340,7 @@ class RecordingTranscriberWidget(QWidget):
self.copy_transcript_button.setText(_("Copied!"))
QTimer.singleShot(2000, lambda: self.copy_transcript_button.setText(_("Copy")))
def on_show_presentation_clicked(self):
"""Handle click on 'Show in new window' button"""
if self.presentation_window is None or not self.presentation_window.isVisible():
@ -668,6 +669,40 @@ class RecordingTranscriberWidget(QWidget):
return text
@staticmethod
def write_to_export_file(file_path: str, content: str, mode: str = "a", retries: int = 5, delay: float = 0.2):
"""Write to an export file with retry logic for Windows file locking."""
for attempt in range(retries):
try:
with open(file_path, mode, encoding='utf-8') as f:
f.write(content)
return
except PermissionError:
if attempt < retries - 1:
time.sleep(delay)
else:
logging.warning("Export write failed after %d retries: %s", retries, file_path)
except OSError as e:
logging.warning("Export write failed: %s", e)
return
@staticmethod
def read_export_file(file_path: str, retries: int = 5, delay: float = 0.2) -> str:
"""Read an export file with retry logic for Windows file locking."""
for attempt in range(retries):
try:
with open(file_path, "r", encoding='utf-8') as f:
return f.read()
except PermissionError:
if attempt < retries - 1:
time.sleep(delay)
else:
logging.warning("Export read failed after %d retries: %s", retries, file_path)
except OSError as e:
logging.warning("Export read failed: %s", e)
return ""
return ""
# Copilot magic implementation of a sliding window approach to find the longest common substring between two texts,
# ignoring the initial differences.
@staticmethod
@ -722,8 +757,7 @@ class RecordingTranscriberWidget(QWidget):
text_box.moveCursor(QTextCursor.MoveOperation.End)
if self.export_enabled and export_file:
with open(export_file, "w") as f:
f.write(merged_texts)
self.write_to_export_file(export_file, merged_texts, mode="w")
def on_next_transcription(self, text: str):
text = self.filter_text(text)
@ -742,8 +776,7 @@ class RecordingTranscriberWidget(QWidget):
self.transcription_text_box.moveCursor(QTextCursor.MoveOperation.End)
if self.export_enabled and self.transcript_export_file:
with open(self.transcript_export_file, "a") as f:
f.write(text + "\n\n")
self.write_to_export_file(self.transcript_export_file, text + "\n\n")
elif self.transcriber_mode == RecordingTranscriberMode.APPEND_ABOVE:
self.transcription_text_box.moveCursor(QTextCursor.MoveOperation.Start)
@ -752,13 +785,11 @@ class RecordingTranscriberWidget(QWidget):
self.transcription_text_box.moveCursor(QTextCursor.MoveOperation.Start)
if self.export_enabled and self.transcript_export_file:
with open(self.transcript_export_file, "r") as f:
existing_content = f.read()
existing_content = ""
if os.path.isfile(self.transcript_export_file):
existing_content = self.read_export_file(self.transcript_export_file)
new_content = text + "\n\n" + existing_content
with open(self.transcript_export_file, "w") as f:
f.write(new_content)
self.write_to_export_file(self.transcript_export_file, new_content, mode="w")
elif self.transcriber_mode == RecordingTranscriberMode.APPEND_AND_CORRECT:
self.process_transcription_merge(text, self.transcripts, self.transcription_text_box, self.transcript_export_file)
@ -792,9 +823,8 @@ class RecordingTranscriberWidget(QWidget):
self.translation_text_box.insertPlainText(self.strip_newlines(text))
self.translation_text_box.moveCursor(QTextCursor.MoveOperation.End)
if self.export_enabled:
with open(self.translation_export_file, "a") as f:
f.write(text + "\n\n")
if self.export_enabled and self.translation_export_file:
self.write_to_export_file(self.translation_export_file, text + "\n\n")
elif self.transcriber_mode == RecordingTranscriberMode.APPEND_ABOVE:
self.translation_text_box.moveCursor(QTextCursor.MoveOperation.Start)
@ -802,14 +832,12 @@ class RecordingTranscriberWidget(QWidget):
self.translation_text_box.insertPlainText("\n\n")
self.translation_text_box.moveCursor(QTextCursor.MoveOperation.Start)
if self.export_enabled:
with open(self.translation_export_file, "r") as f:
existing_content = f.read()
if self.export_enabled and self.translation_export_file:
existing_content = ""
if os.path.isfile(self.translation_export_file):
existing_content = self.read_export_file(self.translation_export_file)
new_content = text + "\n\n" + existing_content
with open(self.translation_export_file, "w") as f:
f.write(new_content)
self.write_to_export_file(self.translation_export_file, new_content, mode="w")
elif self.transcriber_mode == RecordingTranscriberMode.APPEND_AND_CORRECT:
self.process_transcription_merge(text, self.translations, self.translation_text_box, self.translation_export_file)
@ -842,6 +870,7 @@ class RecordingTranscriberWidget(QWidget):
def on_transcriber_finished(self):
self.reset_record_button()
# Restart amplitude listener now that the transcription stream is closed
self.reset_recording_amplitude_listener()
def on_transcriber_error(self, error: str):
@ -899,6 +928,16 @@ class RecordingTranscriberWidget(QWidget):
self.model_loader.cancel()
self.stop_recording()
if self.transcription_thread is not None:
try:
if self.transcription_thread.isRunning():
if not self.transcription_thread.wait(15_000):
logging.warning("Transcription thread did not finish within timeout")
except RuntimeError:
# The underlying C++ QThread was already deleted via deleteLater()
pass
self.transcription_thread = None
if self.recording_amplitude_listener is not None:
self.recording_amplitude_listener.stop_recording()
self.recording_amplitude_listener.deleteLater()

View file

@ -45,23 +45,6 @@ from buzz.settings.settings import Settings
from buzz.widgets.line_edit import LineEdit
from buzz.transcriber.transcriber import Segment
from ctc_forced_aligner.ctc_forced_aligner import (
generate_emissions,
get_alignments,
get_spans,
load_alignment_model,
postprocess_results,
preprocess_text,
)
from whisper_diarization.helpers import (
get_realigned_ws_mapping_with_punctuation,
get_sentences_speaker_mapping,
get_words_speaker_mapping,
langs_to_iso,
punct_model_langs,
)
from deepmultilingualpunctuation.deepmultilingualpunctuation import PunctuationModel
from whisper_diarization.diarization import MSDDDiarizer
def process_in_batches(
@ -167,6 +150,32 @@ class IdentificationWorker(QObject):
}
def run(self):
try:
from ctc_forced_aligner.ctc_forced_aligner import (
generate_emissions,
get_alignments,
get_spans,
load_alignment_model,
postprocess_results,
preprocess_text,
)
from whisper_diarization.helpers import (
get_realigned_ws_mapping_with_punctuation,
get_sentences_speaker_mapping,
get_words_speaker_mapping,
langs_to_iso,
punct_model_langs,
)
from deepmultilingualpunctuation.deepmultilingualpunctuation import PunctuationModel
from whisper_diarization.diarization import MSDDDiarizer
except ImportError as e:
logging.exception("Failed to import speaker identification libraries: %s", e)
self.error.emit(
_("Speaker identification is not available: failed to load required libraries.")
+ f"\n\n{e}"
)
return
diarizer_model = None
alignment_model = None

View file

@ -82,6 +82,42 @@ class CustomBuildHook(BuildHookInterface):
# Build ctc_forced_aligner C++ extension in-place
print("Building ctc_forced_aligner C++ extension...")
ctc_aligner_dir = project_root / "ctc_forced_aligner"
# Apply local patches before building.
# Uses --check first to avoid touching the working tree unnecessarily,
# which is safer in a detached-HEAD submodule.
patches_dir = project_root / "patches"
for patch_file in sorted(patches_dir.glob("ctc_forced_aligner_*.patch")):
# Dry-run forward: succeeds only if patch is NOT yet applied.
check_forward = subprocess.run(
["git", "apply", "--check", "--ignore-whitespace", str(patch_file)],
cwd=ctc_aligner_dir,
capture_output=True,
text=True,
)
if check_forward.returncode == 0:
# Patch can be applied — do it for real.
subprocess.run(
["git", "apply", "--ignore-whitespace", str(patch_file)],
cwd=ctc_aligner_dir,
check=True,
capture_output=True,
text=True,
)
print(f"Applied patch: {patch_file.name}")
else:
# Dry-run failed — either already applied or genuinely broken.
check_reverse = subprocess.run(
["git", "apply", "--check", "--reverse", "--ignore-whitespace", str(patch_file)],
cwd=ctc_aligner_dir,
capture_output=True,
text=True,
)
if check_reverse.returncode == 0:
print(f"Patch already applied (skipping): {patch_file.name}")
else:
print(f"WARNING: could not apply patch {patch_file.name}: {check_forward.stderr}", file=sys.stderr)
result = subprocess.run(
[sys.executable, "setup.py", "build_ext", "--inplace"],
cwd=ctc_aligner_dir,

View file

@ -0,0 +1,16 @@
diff --git a/setup.py b/setup.py
index de84a25..386f662 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,10 @@ ext_modules = [
Pybind11Extension(
"ctc_forced_aligner.ctc_forced_aligner",
["ctc_forced_aligner/forced_align_impl.cpp"],
- extra_compile_args=["/O2"] if sys.platform == "win32" else ["-O3"],
+ # /D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR prevents MSVC runtime mutex
+ # static-initializer crash on newer GitHub Actions Windows runners.
+ # See: https://github.com/actions/runner-images/issues/10004
+ extra_compile_args=["/O2", "/D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR"] if sys.platform == "win32" else ["-O3"],
)
]

View file

@ -1,12 +1,8 @@
import os
import time
import logging
from threading import Thread, Event
from typing import Callable, Any
from unittest.mock import MagicMock
import numpy as np
import sounddevice
from buzz import whisper_audio
@ -103,7 +99,7 @@ class MockInputStream:
def __init__(
self,
callback: Callable[[np.ndarray, int, Any, sounddevice.CallbackFlags], None],
callback: Callable[[np.ndarray, int, Any, Any], None],
*args,
**kwargs,
):
@ -131,7 +127,7 @@ class MockInputStream:
if self._stop_event.is_set():
break
chunk = audio[seek : seek + num_samples_in_chunk]
self.callback(chunk, 0, None, sounddevice.CallbackFlags())
self.callback(chunk, 0, None, None)
seek += num_samples_in_chunk
# loop back around

View file

@ -9,8 +9,8 @@ from buzz.db.entity.transcription import Transcription
from buzz.db.entity.transcription_segment import TranscriptionSegment
from buzz.model_loader import ModelType, WhisperModelSize
from buzz.transcriber.transcriber import Task
# Underlying libs do not support intel Macs
if not (platform.system() == "Darwin" and platform.machine() == "x86_64"):
# Underlying libs do not support intel Macs or Windows (nemo C extensions crash on Windows CI)
if not (platform.system() == "Darwin" and platform.machine() == "x86_64") and platform.system() != "Windows":
from buzz.widgets.transcription_viewer.speaker_identification_widget import (
SpeakerIdentificationWidget,
IdentificationWorker,
@ -19,8 +19,8 @@ if not (platform.system() == "Darwin" and platform.machine() == "x86_64"):
from tests.audio import test_audio_path
@pytest.mark.skipif(
platform.system() == "Darwin" and platform.machine() == "x86_64",
reason="Skip speaker identification tests on macOS x86_64"
(platform.system() == "Darwin" and platform.machine() == "x86_64") or platform.system() == "Windows",
reason="Speaker identification dependencies (nemo/texterrors C extensions) crash on Windows and are unsupported on Intel Mac"
)
class TestSpeakerIdentificationWidget:
@pytest.fixture()