buzz/tests/transcriber/recording_transcriber_test.py
2026-03-07 14:48:41 +00:00

551 lines
20 KiB
Python

import os
import sys
import time
import numpy as np
from unittest.mock import Mock, patch, MagicMock
from PyQt6.QtCore import QThread
from buzz.locale import _
from buzz.assets import APP_BASE_DIR
from buzz.model_loader import TranscriptionModel, ModelType, WhisperModelSize
from buzz.transcriber.recording_transcriber import RecordingTranscriber
from buzz.transcriber.transcriber import TranscriptionOptions, Task
from buzz.settings.recording_transcriber_mode import RecordingTranscriberMode
from tests.mock_sounddevice import MockSoundDevice
from tests.model_loader import get_model_path
class TestAmplitude:
def test_symmetric_array(self):
arr = np.array([1.0, -1.0, 2.0, -2.0])
amplitude = RecordingTranscriber.amplitude(arr)
# RMS: sqrt(mean([1, 1, 4, 4])) = sqrt(2.5) ≈ 1.5811
assert abs(amplitude - np.sqrt(2.5)) < 1e-6
def test_asymmetric_array(self):
arr = np.array([1.0, 2.0, 3.0, -1.0])
amplitude = RecordingTranscriber.amplitude(arr)
# RMS: sqrt(mean([1, 4, 9, 1])) = sqrt(3.75) ≈ 1.9365
assert abs(amplitude - np.sqrt(3.75)) < 1e-6
def test_all_zeros(self):
arr = np.array([0.0, 0.0, 0.0])
amplitude = RecordingTranscriber.amplitude(arr)
assert amplitude == 0.0
def test_all_positive(self):
arr = np.array([1.0, 2.0, 3.0, 4.0])
amplitude = RecordingTranscriber.amplitude(arr)
# RMS: sqrt(mean([1, 4, 9, 16])) = sqrt(7.5) ≈ 2.7386
assert abs(amplitude - np.sqrt(7.5)) < 1e-6
def test_all_negative(self):
arr = np.array([-1.0, -2.0, -3.0, -4.0])
amplitude = RecordingTranscriber.amplitude(arr)
# RMS is symmetric: same as all_positive
assert abs(amplitude - np.sqrt(7.5)) < 1e-6
def test_returns_float(self):
arr = np.array([0.5], dtype=np.float32)
amplitude = RecordingTranscriber.amplitude(arr)
assert isinstance(amplitude, float)
class TestGetDeviceSampleRate:
def test_returns_default_16khz_when_supported(self):
with patch("sounddevice.check_input_settings"):
rate = RecordingTranscriber.get_device_sample_rate(None)
assert rate == 16000
def test_falls_back_to_device_default(self):
import sounddevice
from sounddevice import PortAudioError
def raise_error(*args, **kwargs):
raise PortAudioError("Device doesn't support 16000")
device_info = {"default_samplerate": 44100}
with patch("sounddevice.check_input_settings", side_effect=raise_error), \
patch("sounddevice.query_devices", return_value=device_info):
rate = RecordingTranscriber.get_device_sample_rate(0)
assert rate == 44100
def test_returns_default_when_query_fails(self):
from sounddevice import PortAudioError
def raise_error(*args, **kwargs):
raise PortAudioError("Device doesn't support 16000")
with patch("sounddevice.check_input_settings", side_effect=raise_error), \
patch("sounddevice.query_devices", return_value=None):
rate = RecordingTranscriber.get_device_sample_rate(0)
assert rate == 16000
class TestRecordingTranscriber:
def test_should_transcribe(self, qtbot):
with (patch("sounddevice.check_input_settings")):
thread = QThread()
transcription_model = TranscriptionModel(
model_type=ModelType.WHISPER_CPP, whisper_model_size=WhisperModelSize.TINY
)
model_path = get_model_path(transcription_model)
model_exe_path = os.path.join(APP_BASE_DIR, "whisper_cpp", "whisper-server.exe")
if sys.platform.startswith("win"):
assert os.path.exists(model_exe_path), f"{model_exe_path} does not exist"
transcriber = RecordingTranscriber(
transcription_options=TranscriptionOptions(
model=transcription_model, language="fr", task=Task.TRANSCRIBE
),
input_device_index=0,
sample_rate=16_000,
model_path=model_path,
sounddevice=MockSoundDevice(),
)
transcriber.moveToThread(thread)
thread.started.connect(transcriber.start)
transcriptions = []
def on_transcription(text):
transcriptions.append(text)
transcriber.transcription.connect(on_transcription)
thread.start()
try:
qtbot.waitUntil(lambda: len(transcriptions) == 3, timeout=120_000)
# any string in any transcription
strings_to_check = [_("Starting Whisper.cpp..."), "Bienvenue dans Passe"]
assert any(s in t for s in strings_to_check for t in transcriptions)
finally:
# Ensure cleanup runs even if waitUntil times out
transcriber.stop_recording()
time.sleep(10)
thread.quit()
thread.wait()
# Ensure process is cleaned up
if transcriber.process and transcriber.process.poll() is None:
transcriber.process.terminate()
try:
transcriber.process.wait(timeout=2)
except:
pass
# Process pending events to ensure cleanup
from PyQt6.QtCore import QCoreApplication
QCoreApplication.processEvents()
time.sleep(0.1)
class TestRecordingTranscriberInit:
def test_init_default_mode(self):
transcription_options = TranscriptionOptions(
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
language="en",
task=Task.TRANSCRIBE,
)
with patch("sounddevice.check_input_settings"):
transcriber = RecordingTranscriber(
transcription_options=transcription_options,
input_device_index=0,
sample_rate=16000,
model_path="/fake/path",
sounddevice=MockSoundDevice(),
)
assert transcriber.transcription_options == transcription_options
assert transcriber.input_device_index == 0
assert transcriber.sample_rate == 16000
assert transcriber.model_path == "/fake/path"
assert transcriber.n_batch_samples == 5 * 16000
assert transcriber.keep_sample_seconds == 0.15
assert transcriber.is_running is False
assert transcriber.openai_client is None
def test_init_append_and_correct_mode(self):
transcription_options = TranscriptionOptions(
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
language="en",
task=Task.TRANSCRIBE,
)
with patch("sounddevice.check_input_settings"), \
patch("buzz.transcriber.recording_transcriber.Settings") as mock_settings_class:
# Mock settings to return APPEND_AND_CORRECT mode (index 2 in the enum)
mock_settings_instance = MagicMock()
mock_settings_class.return_value = mock_settings_instance
# Return 2 for APPEND_AND_CORRECT mode (it's the third item in the enum)
mock_settings_instance.value.return_value = 2
transcriber = RecordingTranscriber(
transcription_options=transcription_options,
input_device_index=0,
sample_rate=16000,
model_path="/fake/path",
sounddevice=MockSoundDevice(),
)
# APPEND_AND_CORRECT mode should use smaller batch size and longer keep duration
assert transcriber.n_batch_samples == int(transcription_options.transcription_step * 16000)
assert transcriber.keep_sample_seconds == 1.5
def test_init_stores_silence_threshold(self):
transcription_options = TranscriptionOptions(
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
language="en",
task=Task.TRANSCRIBE,
silence_threshold=0.01,
)
with patch("sounddevice.check_input_settings"):
transcriber = RecordingTranscriber(
transcription_options=transcription_options,
input_device_index=0,
sample_rate=16000,
model_path="/fake/path",
sounddevice=MockSoundDevice(),
)
assert transcriber.transcription_options.silence_threshold == 0.01
def test_init_uses_default_sample_rate_when_none(self):
transcription_options = TranscriptionOptions(
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
language="en",
task=Task.TRANSCRIBE,
)
with patch("sounddevice.check_input_settings"):
transcriber = RecordingTranscriber(
transcription_options=transcription_options,
input_device_index=0,
sample_rate=None,
model_path="/fake/path",
sounddevice=MockSoundDevice(),
)
# Should use default whisper sample rate
assert transcriber.sample_rate == 16000
class TestStreamCallback:
def test_stream_callback_adds_to_queue(self):
transcription_options = TranscriptionOptions(
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
language="en",
task=Task.TRANSCRIBE,
)
with patch("sounddevice.check_input_settings"):
transcriber = RecordingTranscriber(
transcription_options=transcription_options,
input_device_index=0,
sample_rate=16000,
model_path="/fake/path",
sounddevice=MockSoundDevice(),
)
# Create test audio data
in_data = np.array([[0.1], [0.2], [0.3], [0.4]], dtype=np.float32)
initial_size = transcriber.queue.size
transcriber.stream_callback(in_data, 4, None, None)
# Queue should have grown by 4 samples
assert transcriber.queue.size == initial_size + 4
def test_stream_callback_emits_amplitude_changed(self):
transcription_options = TranscriptionOptions(
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
language="en",
task=Task.TRANSCRIBE,
)
with patch("sounddevice.check_input_settings"):
transcriber = RecordingTranscriber(
transcription_options=transcription_options,
input_device_index=0,
sample_rate=16000,
model_path="/fake/path",
sounddevice=MockSoundDevice(),
)
# Mock the amplitude_changed signal
amplitude_values = []
transcriber.amplitude_changed.connect(lambda amp: amplitude_values.append(amp))
# Create test audio data
in_data = np.array([[0.1], [0.2], [0.3], [0.4]], dtype=np.float32)
transcriber.stream_callback(in_data, 4, None, None)
# Should have emitted one amplitude value
assert len(amplitude_values) == 1
assert amplitude_values[0] > 0
def test_stream_callback_drops_data_when_queue_full(self):
transcription_options = TranscriptionOptions(
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
language="en",
task=Task.TRANSCRIBE,
)
with patch("sounddevice.check_input_settings"):
transcriber = RecordingTranscriber(
transcription_options=transcription_options,
input_device_index=0,
sample_rate=16000,
model_path="/fake/path",
sounddevice=MockSoundDevice(),
)
# Fill the queue beyond max_queue_size
transcriber.queue = np.ones(transcriber.max_queue_size, dtype=np.float32)
initial_size = transcriber.queue.size
# Try to add more data
in_data = np.array([[0.1], [0.2]], dtype=np.float32)
transcriber.stream_callback(in_data, 2, None, None)
# Queue should not have grown (data was dropped)
assert transcriber.queue.size == initial_size
class TestStopRecording:
def test_stop_recording_sets_is_running_false(self):
transcription_options = TranscriptionOptions(
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
language="en",
task=Task.TRANSCRIBE,
)
with patch("sounddevice.check_input_settings"):
transcriber = RecordingTranscriber(
transcription_options=transcription_options,
input_device_index=0,
sample_rate=16000,
model_path="/fake/path",
sounddevice=MockSoundDevice(),
)
transcriber.is_running = True
transcriber.stop_recording()
assert transcriber.is_running is False
def test_stop_recording_terminates_process(self):
transcription_options = TranscriptionOptions(
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
language="en",
task=Task.TRANSCRIBE,
)
with patch("sounddevice.check_input_settings"):
transcriber = RecordingTranscriber(
transcription_options=transcription_options,
input_device_index=0,
sample_rate=16000,
model_path="/fake/path",
sounddevice=MockSoundDevice(),
)
# Mock a running process
mock_process = MagicMock()
mock_process.poll.return_value = None # Process is running
transcriber.process = mock_process
transcriber.stop_recording()
# Process should have been terminated and waited
mock_process.terminate.assert_called_once()
mock_process.wait.assert_called_once_with(timeout=5)
def test_stop_recording_skips_terminated_process(self):
transcription_options = TranscriptionOptions(
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
language="en",
task=Task.TRANSCRIBE,
)
with patch("sounddevice.check_input_settings"):
transcriber = RecordingTranscriber(
transcription_options=transcription_options,
input_device_index=0,
sample_rate=16000,
model_path="/fake/path",
sounddevice=MockSoundDevice(),
)
# Mock an already terminated process
mock_process = MagicMock()
mock_process.poll.return_value = 0 # Process already terminated
transcriber.process = mock_process
transcriber.stop_recording()
# terminate and wait should not be called
mock_process.terminate.assert_not_called()
mock_process.wait.assert_not_called()
class TestStartLocalWhisperServer:
def test_start_local_whisper_server_creates_openai_client(self):
transcription_options = TranscriptionOptions(
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
language="en",
task=Task.TRANSCRIBE,
)
with patch("sounddevice.check_input_settings"), \
patch("subprocess.Popen") as mock_popen, \
patch("time.sleep"):
# Mock a successful process
mock_process = MagicMock()
mock_process.poll.return_value = None # Process is running
mock_popen.return_value = mock_process
transcriber = RecordingTranscriber(
transcription_options=transcription_options,
input_device_index=0,
sample_rate=16000,
model_path="/fake/path",
sounddevice=MockSoundDevice(),
)
try:
transcriber.is_running = True
transcriber.start_local_whisper_server()
# Should have created an OpenAI client
assert transcriber.openai_client is not None
assert transcriber.process is not None
finally:
# Clean up to prevent QThread warnings
transcriber.is_running = False
transcriber.process = None
def test_start_local_whisper_server_with_language(self):
transcription_options = TranscriptionOptions(
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
language="fr",
task=Task.TRANSCRIBE,
)
with patch("sounddevice.check_input_settings"), \
patch("subprocess.Popen") as mock_popen, \
patch("time.sleep"):
mock_process = MagicMock()
mock_process.poll.return_value = None
mock_popen.return_value = mock_process
transcriber = RecordingTranscriber(
transcription_options=transcription_options,
input_device_index=0,
sample_rate=16000,
model_path="/fake/path",
sounddevice=MockSoundDevice(),
)
try:
transcriber.is_running = True
transcriber.start_local_whisper_server()
# Check that the language was passed to the command
call_args = mock_popen.call_args
cmd = call_args[0][0]
assert "--language" in cmd
assert "fr" in cmd
finally:
transcriber.is_running = False
transcriber.process = None
def test_start_local_whisper_server_auto_language(self):
transcription_options = TranscriptionOptions(
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
language=None,
task=Task.TRANSCRIBE,
)
with patch("sounddevice.check_input_settings"), \
patch("subprocess.Popen") as mock_popen, \
patch("time.sleep"):
mock_process = MagicMock()
mock_process.poll.return_value = None
mock_popen.return_value = mock_process
transcriber = RecordingTranscriber(
transcription_options=transcription_options,
input_device_index=0,
sample_rate=16000,
model_path="/fake/path",
sounddevice=MockSoundDevice(),
)
try:
transcriber.is_running = True
transcriber.start_local_whisper_server()
# Check that auto language was used
call_args = mock_popen.call_args
cmd = call_args[0][0]
assert "--language" in cmd
assert "auto" in cmd
finally:
transcriber.is_running = False
transcriber.process = None
def test_start_local_whisper_server_handles_failure(self):
transcription_options = TranscriptionOptions(
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
language="en",
task=Task.TRANSCRIBE,
)
with patch("sounddevice.check_input_settings"), \
patch("subprocess.Popen") as mock_popen, \
patch("time.sleep"):
# Mock a failed process
mock_process = MagicMock()
mock_process.poll.return_value = 1 # Process terminated with error
mock_process.stderr.read.return_value = b"Error loading model"
mock_popen.return_value = mock_process
transcriber = RecordingTranscriber(
transcription_options=transcription_options,
input_device_index=0,
sample_rate=16000,
model_path="/fake/path",
sounddevice=MockSoundDevice(),
)
transcriptions = []
transcriber.transcription.connect(lambda text: transcriptions.append(text))
try:
transcriber.is_running = True
transcriber.start_local_whisper_server()
# Should not have created a client when server failed
assert transcriber.openai_client is None
# Should have emitted starting and error messages
assert len(transcriptions) >= 1
# First message should be about starting Whisper.cpp
assert "Whisper" in transcriptions[0]
finally:
transcriber.is_running = False
transcriber.process = None