mirror of
https://github.com/chidiwilliams/buzz.git
synced 2026-03-14 14:45:46 +01:00
Fix for windows crashes (#1387)
This commit is contained in:
parent
ca8b7876fd
commit
bb546acbf9
11 changed files with 171 additions and 74 deletions
6
.github/workflows/ci.yml
vendored
6
.github/workflows/ci.yml
vendored
|
|
@ -94,6 +94,8 @@ jobs:
|
|||
run: |
|
||||
uv run make test
|
||||
shell: bash
|
||||
env:
|
||||
PYTHONFAULTHANDLER: "1"
|
||||
|
||||
- name: Upload coverage reports to Codecov with GitHub Action
|
||||
uses: codecov/codecov-action@v4
|
||||
|
|
@ -174,10 +176,10 @@ jobs:
|
|||
- name: Install dependencies
|
||||
run: uv sync
|
||||
|
||||
- uses: AnimMouse/setup-ffmpeg@v1.2.1
|
||||
- uses: AnimMouse/setup-ffmpeg@v1
|
||||
id: setup-ffmpeg
|
||||
with:
|
||||
version: ${{ matrix.os == 'macos-15-intel' && '7.1.1' || matrix.os == 'macos-latest' && '71' || '7.1' }}
|
||||
version: ${{ matrix.os == 'macos-15-intel' && '7.1.1' || matrix.os == 'macos-latest' && '80' || '8.0' }}
|
||||
|
||||
- name: Install MSVC for Windows
|
||||
run: |
|
||||
|
|
|
|||
12
.github/workflows/snapcraft.yml
vendored
12
.github/workflows/snapcraft.yml
vendored
|
|
@ -24,23 +24,11 @@ jobs:
|
|||
# Ideas from https://github.com/orgs/community/discussions/25678
|
||||
- name: Remove unused build tools
|
||||
run: |
|
||||
sudo apt-get remove -y '^llvm-.*'
|
||||
sudo apt-get remove -y 'php.*'
|
||||
sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel || true
|
||||
sudo apt-get autoremove -y
|
||||
sudo apt-get clean
|
||||
python -m pip cache purge
|
||||
rm -rf /opt/hostedtoolcache || true
|
||||
- name: Maximize build space
|
||||
uses: easimon/maximize-build-space@master
|
||||
with:
|
||||
root-reserve-mb: 45000
|
||||
swap-size-mb: 1024
|
||||
remove-dotnet: 'true'
|
||||
remove-android: 'true'
|
||||
remove-haskell: 'true'
|
||||
remove-codeql: 'true'
|
||||
remove-docker-images: 'true'
|
||||
- name: Check available disk space
|
||||
run: |
|
||||
echo "=== Disk space ==="
|
||||
|
|
|
|||
|
|
@ -27,9 +27,9 @@ class RecordingAmplitudeListener(QObject):
|
|||
callback=self.stream_callback,
|
||||
)
|
||||
self.stream.start()
|
||||
except sounddevice.PortAudioError:
|
||||
except Exception as e:
|
||||
self.stop_recording()
|
||||
logging.exception("")
|
||||
logging.exception("Failed to start audio stream on device %s: %s", self.input_device_index, e)
|
||||
|
||||
def stop_recording(self):
|
||||
if self.stream is not None:
|
||||
|
|
|
|||
|
|
@ -302,7 +302,7 @@ class RecordingTranscriber(QObject):
|
|||
next_text: str = result.get("text")
|
||||
|
||||
# Update initial prompt between successive recording chunks
|
||||
initial_prompt += next_text
|
||||
initial_prompt = next_text
|
||||
|
||||
logging.debug(
|
||||
"Received next result, length = %s, time taken = %s",
|
||||
|
|
@ -315,7 +315,11 @@ class RecordingTranscriber(QObject):
|
|||
|
||||
except PortAudioError as exc:
|
||||
self.error.emit(str(exc))
|
||||
logging.exception("")
|
||||
logging.exception("PortAudio error during recording")
|
||||
return
|
||||
except Exception as exc:
|
||||
logging.exception("Unexpected error during recording")
|
||||
self.error.emit(str(exc))
|
||||
return
|
||||
|
||||
self.finished.emit()
|
||||
|
|
@ -361,7 +365,11 @@ class RecordingTranscriber(QObject):
|
|||
self.is_running = False
|
||||
if self.process and self.process.poll() is None:
|
||||
self.process.terminate()
|
||||
self.process.wait(timeout=5)
|
||||
try:
|
||||
self.process.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.process.kill()
|
||||
logging.warning("Whisper server process had to be killed after timeout")
|
||||
|
||||
def start_local_whisper_server(self):
|
||||
# Reduce verbose HTTP client logging from OpenAI/httpx
|
||||
|
|
@ -466,4 +474,7 @@ class RecordingTranscriber(QObject):
|
|||
def __del__(self):
|
||||
if self.process and self.process.poll() is None:
|
||||
self.process.terminate()
|
||||
self.process.wait(timeout=5)
|
||||
try:
|
||||
self.process.wait(timeout=5)
|
||||
except subprocess.TimeoutExpired:
|
||||
self.process.kill()
|
||||
|
|
@ -76,4 +76,4 @@ class AudioMeterWidget(QWidget):
|
|||
self.current_amplitude = max(
|
||||
amplitude, self.current_amplitude * self.SMOOTHING_FACTOR
|
||||
)
|
||||
self.repaint()
|
||||
self.update()
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import os
|
||||
import re
|
||||
import enum
|
||||
import time
|
||||
import requests
|
||||
import logging
|
||||
import datetime
|
||||
|
|
@ -212,7 +213,7 @@ class RecordingTranscriberWidget(QWidget):
|
|||
self.presentation_options_bar.hide()
|
||||
self.copy_actions_bar = self.create_copy_actions_bar()
|
||||
layout.addWidget(self.copy_actions_bar) # Add at the bottom
|
||||
self.copy_actions_bar.hide()
|
||||
self.copy_actions_bar.hide()
|
||||
|
||||
def create_presentation_options_bar(self) -> QWidget:
|
||||
"""Crete the presentation options bar widget"""
|
||||
|
|
@ -296,15 +297,15 @@ class RecordingTranscriberWidget(QWidget):
|
|||
layout = QHBoxLayout(bar)
|
||||
layout.setContentsMargins(5, 5, 5, 5)
|
||||
layout.setSpacing(10)
|
||||
|
||||
|
||||
layout.addStretch() # Push button to the right
|
||||
|
||||
|
||||
self.copy_transcript_button = QPushButton(_("Copy"), bar)
|
||||
self.copy_transcript_button.setToolTip(_("Copy transcription to clipboard"))
|
||||
self.copy_transcript_button.clicked.connect(self.on_copy_transcript_clicked)
|
||||
layout.addWidget(self.copy_transcript_button)
|
||||
|
||||
return bar
|
||||
|
||||
return bar
|
||||
|
||||
def on_copy_transcript_clicked(self):
|
||||
"""Handle copy transcript button click"""
|
||||
|
|
@ -339,7 +340,7 @@ class RecordingTranscriberWidget(QWidget):
|
|||
|
||||
self.copy_transcript_button.setText(_("Copied!"))
|
||||
QTimer.singleShot(2000, lambda: self.copy_transcript_button.setText(_("Copy")))
|
||||
|
||||
|
||||
def on_show_presentation_clicked(self):
|
||||
"""Handle click on 'Show in new window' button"""
|
||||
if self.presentation_window is None or not self.presentation_window.isVisible():
|
||||
|
|
@ -668,6 +669,40 @@ class RecordingTranscriberWidget(QWidget):
|
|||
|
||||
return text
|
||||
|
||||
@staticmethod
|
||||
def write_to_export_file(file_path: str, content: str, mode: str = "a", retries: int = 5, delay: float = 0.2):
|
||||
"""Write to an export file with retry logic for Windows file locking."""
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
with open(file_path, mode, encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
return
|
||||
except PermissionError:
|
||||
if attempt < retries - 1:
|
||||
time.sleep(delay)
|
||||
else:
|
||||
logging.warning("Export write failed after %d retries: %s", retries, file_path)
|
||||
except OSError as e:
|
||||
logging.warning("Export write failed: %s", e)
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def read_export_file(file_path: str, retries: int = 5, delay: float = 0.2) -> str:
|
||||
"""Read an export file with retry logic for Windows file locking."""
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
with open(file_path, "r", encoding='utf-8') as f:
|
||||
return f.read()
|
||||
except PermissionError:
|
||||
if attempt < retries - 1:
|
||||
time.sleep(delay)
|
||||
else:
|
||||
logging.warning("Export read failed after %d retries: %s", retries, file_path)
|
||||
except OSError as e:
|
||||
logging.warning("Export read failed: %s", e)
|
||||
return ""
|
||||
return ""
|
||||
|
||||
# Copilot magic implementation of a sliding window approach to find the longest common substring between two texts,
|
||||
# ignoring the initial differences.
|
||||
@staticmethod
|
||||
|
|
@ -722,8 +757,7 @@ class RecordingTranscriberWidget(QWidget):
|
|||
text_box.moveCursor(QTextCursor.MoveOperation.End)
|
||||
|
||||
if self.export_enabled and export_file:
|
||||
with open(export_file, "w") as f:
|
||||
f.write(merged_texts)
|
||||
self.write_to_export_file(export_file, merged_texts, mode="w")
|
||||
|
||||
def on_next_transcription(self, text: str):
|
||||
text = self.filter_text(text)
|
||||
|
|
@ -742,8 +776,7 @@ class RecordingTranscriberWidget(QWidget):
|
|||
self.transcription_text_box.moveCursor(QTextCursor.MoveOperation.End)
|
||||
|
||||
if self.export_enabled and self.transcript_export_file:
|
||||
with open(self.transcript_export_file, "a") as f:
|
||||
f.write(text + "\n\n")
|
||||
self.write_to_export_file(self.transcript_export_file, text + "\n\n")
|
||||
|
||||
elif self.transcriber_mode == RecordingTranscriberMode.APPEND_ABOVE:
|
||||
self.transcription_text_box.moveCursor(QTextCursor.MoveOperation.Start)
|
||||
|
|
@ -752,13 +785,11 @@ class RecordingTranscriberWidget(QWidget):
|
|||
self.transcription_text_box.moveCursor(QTextCursor.MoveOperation.Start)
|
||||
|
||||
if self.export_enabled and self.transcript_export_file:
|
||||
with open(self.transcript_export_file, "r") as f:
|
||||
existing_content = f.read()
|
||||
|
||||
existing_content = ""
|
||||
if os.path.isfile(self.transcript_export_file):
|
||||
existing_content = self.read_export_file(self.transcript_export_file)
|
||||
new_content = text + "\n\n" + existing_content
|
||||
|
||||
with open(self.transcript_export_file, "w") as f:
|
||||
f.write(new_content)
|
||||
self.write_to_export_file(self.transcript_export_file, new_content, mode="w")
|
||||
|
||||
elif self.transcriber_mode == RecordingTranscriberMode.APPEND_AND_CORRECT:
|
||||
self.process_transcription_merge(text, self.transcripts, self.transcription_text_box, self.transcript_export_file)
|
||||
|
|
@ -792,9 +823,8 @@ class RecordingTranscriberWidget(QWidget):
|
|||
self.translation_text_box.insertPlainText(self.strip_newlines(text))
|
||||
self.translation_text_box.moveCursor(QTextCursor.MoveOperation.End)
|
||||
|
||||
if self.export_enabled:
|
||||
with open(self.translation_export_file, "a") as f:
|
||||
f.write(text + "\n\n")
|
||||
if self.export_enabled and self.translation_export_file:
|
||||
self.write_to_export_file(self.translation_export_file, text + "\n\n")
|
||||
|
||||
elif self.transcriber_mode == RecordingTranscriberMode.APPEND_ABOVE:
|
||||
self.translation_text_box.moveCursor(QTextCursor.MoveOperation.Start)
|
||||
|
|
@ -802,14 +832,12 @@ class RecordingTranscriberWidget(QWidget):
|
|||
self.translation_text_box.insertPlainText("\n\n")
|
||||
self.translation_text_box.moveCursor(QTextCursor.MoveOperation.Start)
|
||||
|
||||
if self.export_enabled:
|
||||
with open(self.translation_export_file, "r") as f:
|
||||
existing_content = f.read()
|
||||
|
||||
if self.export_enabled and self.translation_export_file:
|
||||
existing_content = ""
|
||||
if os.path.isfile(self.translation_export_file):
|
||||
existing_content = self.read_export_file(self.translation_export_file)
|
||||
new_content = text + "\n\n" + existing_content
|
||||
|
||||
with open(self.translation_export_file, "w") as f:
|
||||
f.write(new_content)
|
||||
self.write_to_export_file(self.translation_export_file, new_content, mode="w")
|
||||
|
||||
elif self.transcriber_mode == RecordingTranscriberMode.APPEND_AND_CORRECT:
|
||||
self.process_transcription_merge(text, self.translations, self.translation_text_box, self.translation_export_file)
|
||||
|
|
@ -842,6 +870,7 @@ class RecordingTranscriberWidget(QWidget):
|
|||
|
||||
def on_transcriber_finished(self):
|
||||
self.reset_record_button()
|
||||
# Restart amplitude listener now that the transcription stream is closed
|
||||
self.reset_recording_amplitude_listener()
|
||||
|
||||
def on_transcriber_error(self, error: str):
|
||||
|
|
@ -899,6 +928,16 @@ class RecordingTranscriberWidget(QWidget):
|
|||
self.model_loader.cancel()
|
||||
|
||||
self.stop_recording()
|
||||
if self.transcription_thread is not None:
|
||||
try:
|
||||
if self.transcription_thread.isRunning():
|
||||
if not self.transcription_thread.wait(15_000):
|
||||
logging.warning("Transcription thread did not finish within timeout")
|
||||
except RuntimeError:
|
||||
# The underlying C++ QThread was already deleted via deleteLater()
|
||||
pass
|
||||
self.transcription_thread = None
|
||||
|
||||
if self.recording_amplitude_listener is not None:
|
||||
self.recording_amplitude_listener.stop_recording()
|
||||
self.recording_amplitude_listener.deleteLater()
|
||||
|
|
|
|||
|
|
@ -45,23 +45,6 @@ from buzz.settings.settings import Settings
|
|||
from buzz.widgets.line_edit import LineEdit
|
||||
from buzz.transcriber.transcriber import Segment
|
||||
|
||||
from ctc_forced_aligner.ctc_forced_aligner import (
|
||||
generate_emissions,
|
||||
get_alignments,
|
||||
get_spans,
|
||||
load_alignment_model,
|
||||
postprocess_results,
|
||||
preprocess_text,
|
||||
)
|
||||
from whisper_diarization.helpers import (
|
||||
get_realigned_ws_mapping_with_punctuation,
|
||||
get_sentences_speaker_mapping,
|
||||
get_words_speaker_mapping,
|
||||
langs_to_iso,
|
||||
punct_model_langs,
|
||||
)
|
||||
from deepmultilingualpunctuation.deepmultilingualpunctuation import PunctuationModel
|
||||
from whisper_diarization.diarization import MSDDDiarizer
|
||||
|
||||
|
||||
def process_in_batches(
|
||||
|
|
@ -167,6 +150,32 @@ class IdentificationWorker(QObject):
|
|||
}
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
from ctc_forced_aligner.ctc_forced_aligner import (
|
||||
generate_emissions,
|
||||
get_alignments,
|
||||
get_spans,
|
||||
load_alignment_model,
|
||||
postprocess_results,
|
||||
preprocess_text,
|
||||
)
|
||||
from whisper_diarization.helpers import (
|
||||
get_realigned_ws_mapping_with_punctuation,
|
||||
get_sentences_speaker_mapping,
|
||||
get_words_speaker_mapping,
|
||||
langs_to_iso,
|
||||
punct_model_langs,
|
||||
)
|
||||
from deepmultilingualpunctuation.deepmultilingualpunctuation import PunctuationModel
|
||||
from whisper_diarization.diarization import MSDDDiarizer
|
||||
except ImportError as e:
|
||||
logging.exception("Failed to import speaker identification libraries: %s", e)
|
||||
self.error.emit(
|
||||
_("Speaker identification is not available: failed to load required libraries.")
|
||||
+ f"\n\n{e}"
|
||||
)
|
||||
return
|
||||
|
||||
diarizer_model = None
|
||||
alignment_model = None
|
||||
|
||||
|
|
|
|||
|
|
@ -82,6 +82,42 @@ class CustomBuildHook(BuildHookInterface):
|
|||
# Build ctc_forced_aligner C++ extension in-place
|
||||
print("Building ctc_forced_aligner C++ extension...")
|
||||
ctc_aligner_dir = project_root / "ctc_forced_aligner"
|
||||
|
||||
# Apply local patches before building.
|
||||
# Uses --check first to avoid touching the working tree unnecessarily,
|
||||
# which is safer in a detached-HEAD submodule.
|
||||
patches_dir = project_root / "patches"
|
||||
for patch_file in sorted(patches_dir.glob("ctc_forced_aligner_*.patch")):
|
||||
# Dry-run forward: succeeds only if patch is NOT yet applied.
|
||||
check_forward = subprocess.run(
|
||||
["git", "apply", "--check", "--ignore-whitespace", str(patch_file)],
|
||||
cwd=ctc_aligner_dir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if check_forward.returncode == 0:
|
||||
# Patch can be applied — do it for real.
|
||||
subprocess.run(
|
||||
["git", "apply", "--ignore-whitespace", str(patch_file)],
|
||||
cwd=ctc_aligner_dir,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
print(f"Applied patch: {patch_file.name}")
|
||||
else:
|
||||
# Dry-run failed — either already applied or genuinely broken.
|
||||
check_reverse = subprocess.run(
|
||||
["git", "apply", "--check", "--reverse", "--ignore-whitespace", str(patch_file)],
|
||||
cwd=ctc_aligner_dir,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if check_reverse.returncode == 0:
|
||||
print(f"Patch already applied (skipping): {patch_file.name}")
|
||||
else:
|
||||
print(f"WARNING: could not apply patch {patch_file.name}: {check_forward.stderr}", file=sys.stderr)
|
||||
|
||||
result = subprocess.run(
|
||||
[sys.executable, "setup.py", "build_ext", "--inplace"],
|
||||
cwd=ctc_aligner_dir,
|
||||
|
|
|
|||
16
patches/ctc_forced_aligner_windows_mutex.patch
Normal file
16
patches/ctc_forced_aligner_windows_mutex.patch
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
diff --git a/setup.py b/setup.py
|
||||
index de84a25..386f662 100644
|
||||
--- a/setup.py
|
||||
+++ b/setup.py
|
||||
@@ -6,7 +6,10 @@ ext_modules = [
|
||||
Pybind11Extension(
|
||||
"ctc_forced_aligner.ctc_forced_aligner",
|
||||
["ctc_forced_aligner/forced_align_impl.cpp"],
|
||||
- extra_compile_args=["/O2"] if sys.platform == "win32" else ["-O3"],
|
||||
+ # /D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR prevents MSVC runtime mutex
|
||||
+ # static-initializer crash on newer GitHub Actions Windows runners.
|
||||
+ # See: https://github.com/actions/runner-images/issues/10004
|
||||
+ extra_compile_args=["/O2", "/D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR"] if sys.platform == "win32" else ["-O3"],
|
||||
)
|
||||
]
|
||||
|
||||
|
|
@ -1,12 +1,8 @@
|
|||
import os
|
||||
import time
|
||||
import logging
|
||||
from threading import Thread, Event
|
||||
from typing import Callable, Any
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import numpy as np
|
||||
import sounddevice
|
||||
|
||||
from buzz import whisper_audio
|
||||
|
||||
|
|
@ -103,7 +99,7 @@ class MockInputStream:
|
|||
|
||||
def __init__(
|
||||
self,
|
||||
callback: Callable[[np.ndarray, int, Any, sounddevice.CallbackFlags], None],
|
||||
callback: Callable[[np.ndarray, int, Any, Any], None],
|
||||
*args,
|
||||
**kwargs,
|
||||
):
|
||||
|
|
@ -131,7 +127,7 @@ class MockInputStream:
|
|||
if self._stop_event.is_set():
|
||||
break
|
||||
chunk = audio[seek : seek + num_samples_in_chunk]
|
||||
self.callback(chunk, 0, None, sounddevice.CallbackFlags())
|
||||
self.callback(chunk, 0, None, None)
|
||||
seek += num_samples_in_chunk
|
||||
|
||||
# loop back around
|
||||
|
|
|
|||
|
|
@ -9,8 +9,8 @@ from buzz.db.entity.transcription import Transcription
|
|||
from buzz.db.entity.transcription_segment import TranscriptionSegment
|
||||
from buzz.model_loader import ModelType, WhisperModelSize
|
||||
from buzz.transcriber.transcriber import Task
|
||||
# Underlying libs do not support intel Macs
|
||||
if not (platform.system() == "Darwin" and platform.machine() == "x86_64"):
|
||||
# Underlying libs do not support intel Macs or Windows (nemo C extensions crash on Windows CI)
|
||||
if not (platform.system() == "Darwin" and platform.machine() == "x86_64") and platform.system() != "Windows":
|
||||
from buzz.widgets.transcription_viewer.speaker_identification_widget import (
|
||||
SpeakerIdentificationWidget,
|
||||
IdentificationWorker,
|
||||
|
|
@ -19,8 +19,8 @@ if not (platform.system() == "Darwin" and platform.machine() == "x86_64"):
|
|||
from tests.audio import test_audio_path
|
||||
|
||||
@pytest.mark.skipif(
|
||||
platform.system() == "Darwin" and platform.machine() == "x86_64",
|
||||
reason="Skip speaker identification tests on macOS x86_64"
|
||||
(platform.system() == "Darwin" and platform.machine() == "x86_64") or platform.system() == "Windows",
|
||||
reason="Speaker identification dependencies (nemo/texterrors C extensions) crash on Windows and are unsupported on Intel Mac"
|
||||
)
|
||||
class TestSpeakerIdentificationWidget:
|
||||
@pytest.fixture()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue