mirror of
https://github.com/chidiwilliams/buzz.git
synced 2026-03-14 14:45:46 +01:00
551 lines
20 KiB
Python
551 lines
20 KiB
Python
import os
|
|
import sys
|
|
import time
|
|
import numpy as np
|
|
from unittest.mock import Mock, patch, MagicMock
|
|
|
|
from PyQt6.QtCore import QThread
|
|
|
|
from buzz.locale import _
|
|
from buzz.assets import APP_BASE_DIR
|
|
from buzz.model_loader import TranscriptionModel, ModelType, WhisperModelSize
|
|
from buzz.transcriber.recording_transcriber import RecordingTranscriber
|
|
from buzz.transcriber.transcriber import TranscriptionOptions, Task
|
|
from buzz.settings.recording_transcriber_mode import RecordingTranscriberMode
|
|
from tests.mock_sounddevice import MockSoundDevice
|
|
from tests.model_loader import get_model_path
|
|
|
|
|
|
class TestAmplitude:
|
|
def test_symmetric_array(self):
|
|
arr = np.array([1.0, -1.0, 2.0, -2.0])
|
|
amplitude = RecordingTranscriber.amplitude(arr)
|
|
# RMS: sqrt(mean([1, 1, 4, 4])) = sqrt(2.5) ≈ 1.5811
|
|
assert abs(amplitude - np.sqrt(2.5)) < 1e-6
|
|
|
|
def test_asymmetric_array(self):
|
|
arr = np.array([1.0, 2.0, 3.0, -1.0])
|
|
amplitude = RecordingTranscriber.amplitude(arr)
|
|
# RMS: sqrt(mean([1, 4, 9, 1])) = sqrt(3.75) ≈ 1.9365
|
|
assert abs(amplitude - np.sqrt(3.75)) < 1e-6
|
|
|
|
def test_all_zeros(self):
|
|
arr = np.array([0.0, 0.0, 0.0])
|
|
amplitude = RecordingTranscriber.amplitude(arr)
|
|
assert amplitude == 0.0
|
|
|
|
def test_all_positive(self):
|
|
arr = np.array([1.0, 2.0, 3.0, 4.0])
|
|
amplitude = RecordingTranscriber.amplitude(arr)
|
|
# RMS: sqrt(mean([1, 4, 9, 16])) = sqrt(7.5) ≈ 2.7386
|
|
assert abs(amplitude - np.sqrt(7.5)) < 1e-6
|
|
|
|
def test_all_negative(self):
|
|
arr = np.array([-1.0, -2.0, -3.0, -4.0])
|
|
amplitude = RecordingTranscriber.amplitude(arr)
|
|
# RMS is symmetric: same as all_positive
|
|
assert abs(amplitude - np.sqrt(7.5)) < 1e-6
|
|
|
|
def test_returns_float(self):
|
|
arr = np.array([0.5], dtype=np.float32)
|
|
amplitude = RecordingTranscriber.amplitude(arr)
|
|
assert isinstance(amplitude, float)
|
|
|
|
|
|
class TestGetDeviceSampleRate:
|
|
def test_returns_default_16khz_when_supported(self):
|
|
with patch("sounddevice.check_input_settings"):
|
|
rate = RecordingTranscriber.get_device_sample_rate(None)
|
|
assert rate == 16000
|
|
|
|
def test_falls_back_to_device_default(self):
|
|
import sounddevice
|
|
from sounddevice import PortAudioError
|
|
|
|
def raise_error(*args, **kwargs):
|
|
raise PortAudioError("Device doesn't support 16000")
|
|
|
|
device_info = {"default_samplerate": 44100}
|
|
with patch("sounddevice.check_input_settings", side_effect=raise_error), \
|
|
patch("sounddevice.query_devices", return_value=device_info):
|
|
rate = RecordingTranscriber.get_device_sample_rate(0)
|
|
assert rate == 44100
|
|
|
|
def test_returns_default_when_query_fails(self):
|
|
from sounddevice import PortAudioError
|
|
|
|
def raise_error(*args, **kwargs):
|
|
raise PortAudioError("Device doesn't support 16000")
|
|
|
|
with patch("sounddevice.check_input_settings", side_effect=raise_error), \
|
|
patch("sounddevice.query_devices", return_value=None):
|
|
rate = RecordingTranscriber.get_device_sample_rate(0)
|
|
assert rate == 16000
|
|
|
|
|
|
class TestRecordingTranscriber:
|
|
|
|
def test_should_transcribe(self, qtbot):
|
|
with (patch("sounddevice.check_input_settings")):
|
|
thread = QThread()
|
|
|
|
transcription_model = TranscriptionModel(
|
|
model_type=ModelType.WHISPER_CPP, whisper_model_size=WhisperModelSize.TINY
|
|
)
|
|
|
|
model_path = get_model_path(transcription_model)
|
|
|
|
model_exe_path = os.path.join(APP_BASE_DIR, "whisper_cpp", "whisper-server.exe")
|
|
if sys.platform.startswith("win"):
|
|
assert os.path.exists(model_exe_path), f"{model_exe_path} does not exist"
|
|
|
|
transcriber = RecordingTranscriber(
|
|
transcription_options=TranscriptionOptions(
|
|
model=transcription_model, language="fr", task=Task.TRANSCRIBE
|
|
),
|
|
input_device_index=0,
|
|
sample_rate=16_000,
|
|
model_path=model_path,
|
|
sounddevice=MockSoundDevice(),
|
|
)
|
|
transcriber.moveToThread(thread)
|
|
|
|
thread.started.connect(transcriber.start)
|
|
|
|
transcriptions = []
|
|
|
|
def on_transcription(text):
|
|
transcriptions.append(text)
|
|
|
|
transcriber.transcription.connect(on_transcription)
|
|
|
|
thread.start()
|
|
try:
|
|
qtbot.waitUntil(lambda: len(transcriptions) == 3, timeout=120_000)
|
|
|
|
# any string in any transcription
|
|
strings_to_check = [_("Starting Whisper.cpp..."), "Bienvenue dans Passe"]
|
|
assert any(s in t for s in strings_to_check for t in transcriptions)
|
|
finally:
|
|
# Ensure cleanup runs even if waitUntil times out
|
|
transcriber.stop_recording()
|
|
time.sleep(10)
|
|
|
|
thread.quit()
|
|
thread.wait()
|
|
|
|
# Ensure process is cleaned up
|
|
if transcriber.process and transcriber.process.poll() is None:
|
|
transcriber.process.terminate()
|
|
try:
|
|
transcriber.process.wait(timeout=2)
|
|
except:
|
|
pass
|
|
|
|
# Process pending events to ensure cleanup
|
|
from PyQt6.QtCore import QCoreApplication
|
|
QCoreApplication.processEvents()
|
|
time.sleep(0.1)
|
|
|
|
|
|
class TestRecordingTranscriberInit:
|
|
def test_init_default_mode(self):
|
|
transcription_options = TranscriptionOptions(
|
|
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
|
|
language="en",
|
|
task=Task.TRANSCRIBE,
|
|
)
|
|
|
|
with patch("sounddevice.check_input_settings"):
|
|
transcriber = RecordingTranscriber(
|
|
transcription_options=transcription_options,
|
|
input_device_index=0,
|
|
sample_rate=16000,
|
|
model_path="/fake/path",
|
|
sounddevice=MockSoundDevice(),
|
|
)
|
|
|
|
assert transcriber.transcription_options == transcription_options
|
|
assert transcriber.input_device_index == 0
|
|
assert transcriber.sample_rate == 16000
|
|
assert transcriber.model_path == "/fake/path"
|
|
assert transcriber.n_batch_samples == 5 * 16000
|
|
assert transcriber.keep_sample_seconds == 0.15
|
|
assert transcriber.is_running is False
|
|
assert transcriber.openai_client is None
|
|
|
|
def test_init_append_and_correct_mode(self):
|
|
transcription_options = TranscriptionOptions(
|
|
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
|
|
language="en",
|
|
task=Task.TRANSCRIBE,
|
|
)
|
|
|
|
with patch("sounddevice.check_input_settings"), \
|
|
patch("buzz.transcriber.recording_transcriber.Settings") as mock_settings_class:
|
|
# Mock settings to return APPEND_AND_CORRECT mode (index 2 in the enum)
|
|
mock_settings_instance = MagicMock()
|
|
mock_settings_class.return_value = mock_settings_instance
|
|
# Return 2 for APPEND_AND_CORRECT mode (it's the third item in the enum)
|
|
mock_settings_instance.value.return_value = 2
|
|
|
|
transcriber = RecordingTranscriber(
|
|
transcription_options=transcription_options,
|
|
input_device_index=0,
|
|
sample_rate=16000,
|
|
model_path="/fake/path",
|
|
sounddevice=MockSoundDevice(),
|
|
)
|
|
|
|
# APPEND_AND_CORRECT mode should use smaller batch size and longer keep duration
|
|
assert transcriber.n_batch_samples == int(transcription_options.transcription_step * 16000)
|
|
assert transcriber.keep_sample_seconds == 1.5
|
|
|
|
def test_init_stores_silence_threshold(self):
|
|
transcription_options = TranscriptionOptions(
|
|
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
|
|
language="en",
|
|
task=Task.TRANSCRIBE,
|
|
silence_threshold=0.01,
|
|
)
|
|
|
|
with patch("sounddevice.check_input_settings"):
|
|
transcriber = RecordingTranscriber(
|
|
transcription_options=transcription_options,
|
|
input_device_index=0,
|
|
sample_rate=16000,
|
|
model_path="/fake/path",
|
|
sounddevice=MockSoundDevice(),
|
|
)
|
|
|
|
assert transcriber.transcription_options.silence_threshold == 0.01
|
|
|
|
def test_init_uses_default_sample_rate_when_none(self):
|
|
transcription_options = TranscriptionOptions(
|
|
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
|
|
language="en",
|
|
task=Task.TRANSCRIBE,
|
|
)
|
|
|
|
with patch("sounddevice.check_input_settings"):
|
|
transcriber = RecordingTranscriber(
|
|
transcription_options=transcription_options,
|
|
input_device_index=0,
|
|
sample_rate=None,
|
|
model_path="/fake/path",
|
|
sounddevice=MockSoundDevice(),
|
|
)
|
|
|
|
# Should use default whisper sample rate
|
|
assert transcriber.sample_rate == 16000
|
|
|
|
|
|
class TestStreamCallback:
|
|
def test_stream_callback_adds_to_queue(self):
|
|
transcription_options = TranscriptionOptions(
|
|
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
|
|
language="en",
|
|
task=Task.TRANSCRIBE,
|
|
)
|
|
|
|
with patch("sounddevice.check_input_settings"):
|
|
transcriber = RecordingTranscriber(
|
|
transcription_options=transcription_options,
|
|
input_device_index=0,
|
|
sample_rate=16000,
|
|
model_path="/fake/path",
|
|
sounddevice=MockSoundDevice(),
|
|
)
|
|
|
|
# Create test audio data
|
|
in_data = np.array([[0.1], [0.2], [0.3], [0.4]], dtype=np.float32)
|
|
|
|
initial_size = transcriber.queue.size
|
|
transcriber.stream_callback(in_data, 4, None, None)
|
|
|
|
# Queue should have grown by 4 samples
|
|
assert transcriber.queue.size == initial_size + 4
|
|
|
|
def test_stream_callback_emits_amplitude_changed(self):
|
|
transcription_options = TranscriptionOptions(
|
|
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
|
|
language="en",
|
|
task=Task.TRANSCRIBE,
|
|
)
|
|
|
|
with patch("sounddevice.check_input_settings"):
|
|
transcriber = RecordingTranscriber(
|
|
transcription_options=transcription_options,
|
|
input_device_index=0,
|
|
sample_rate=16000,
|
|
model_path="/fake/path",
|
|
sounddevice=MockSoundDevice(),
|
|
)
|
|
|
|
# Mock the amplitude_changed signal
|
|
amplitude_values = []
|
|
transcriber.amplitude_changed.connect(lambda amp: amplitude_values.append(amp))
|
|
|
|
# Create test audio data
|
|
in_data = np.array([[0.1], [0.2], [0.3], [0.4]], dtype=np.float32)
|
|
transcriber.stream_callback(in_data, 4, None, None)
|
|
|
|
# Should have emitted one amplitude value
|
|
assert len(amplitude_values) == 1
|
|
assert amplitude_values[0] > 0
|
|
|
|
def test_stream_callback_drops_data_when_queue_full(self):
|
|
transcription_options = TranscriptionOptions(
|
|
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
|
|
language="en",
|
|
task=Task.TRANSCRIBE,
|
|
)
|
|
|
|
with patch("sounddevice.check_input_settings"):
|
|
transcriber = RecordingTranscriber(
|
|
transcription_options=transcription_options,
|
|
input_device_index=0,
|
|
sample_rate=16000,
|
|
model_path="/fake/path",
|
|
sounddevice=MockSoundDevice(),
|
|
)
|
|
|
|
# Fill the queue beyond max_queue_size
|
|
transcriber.queue = np.ones(transcriber.max_queue_size, dtype=np.float32)
|
|
initial_size = transcriber.queue.size
|
|
|
|
# Try to add more data
|
|
in_data = np.array([[0.1], [0.2]], dtype=np.float32)
|
|
transcriber.stream_callback(in_data, 2, None, None)
|
|
|
|
# Queue should not have grown (data was dropped)
|
|
assert transcriber.queue.size == initial_size
|
|
|
|
|
|
class TestStopRecording:
|
|
def test_stop_recording_sets_is_running_false(self):
|
|
transcription_options = TranscriptionOptions(
|
|
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
|
|
language="en",
|
|
task=Task.TRANSCRIBE,
|
|
)
|
|
|
|
with patch("sounddevice.check_input_settings"):
|
|
transcriber = RecordingTranscriber(
|
|
transcription_options=transcription_options,
|
|
input_device_index=0,
|
|
sample_rate=16000,
|
|
model_path="/fake/path",
|
|
sounddevice=MockSoundDevice(),
|
|
)
|
|
|
|
transcriber.is_running = True
|
|
transcriber.stop_recording()
|
|
|
|
assert transcriber.is_running is False
|
|
|
|
def test_stop_recording_terminates_process(self):
|
|
transcription_options = TranscriptionOptions(
|
|
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
|
|
language="en",
|
|
task=Task.TRANSCRIBE,
|
|
)
|
|
|
|
with patch("sounddevice.check_input_settings"):
|
|
transcriber = RecordingTranscriber(
|
|
transcription_options=transcription_options,
|
|
input_device_index=0,
|
|
sample_rate=16000,
|
|
model_path="/fake/path",
|
|
sounddevice=MockSoundDevice(),
|
|
)
|
|
|
|
# Mock a running process
|
|
mock_process = MagicMock()
|
|
mock_process.poll.return_value = None # Process is running
|
|
transcriber.process = mock_process
|
|
|
|
transcriber.stop_recording()
|
|
|
|
# Process should have been terminated and waited
|
|
mock_process.terminate.assert_called_once()
|
|
mock_process.wait.assert_called_once_with(timeout=5)
|
|
|
|
def test_stop_recording_skips_terminated_process(self):
|
|
transcription_options = TranscriptionOptions(
|
|
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
|
|
language="en",
|
|
task=Task.TRANSCRIBE,
|
|
)
|
|
|
|
with patch("sounddevice.check_input_settings"):
|
|
transcriber = RecordingTranscriber(
|
|
transcription_options=transcription_options,
|
|
input_device_index=0,
|
|
sample_rate=16000,
|
|
model_path="/fake/path",
|
|
sounddevice=MockSoundDevice(),
|
|
)
|
|
|
|
# Mock an already terminated process
|
|
mock_process = MagicMock()
|
|
mock_process.poll.return_value = 0 # Process already terminated
|
|
transcriber.process = mock_process
|
|
|
|
transcriber.stop_recording()
|
|
|
|
# terminate and wait should not be called
|
|
mock_process.terminate.assert_not_called()
|
|
mock_process.wait.assert_not_called()
|
|
|
|
|
|
class TestStartLocalWhisperServer:
|
|
def test_start_local_whisper_server_creates_openai_client(self):
|
|
transcription_options = TranscriptionOptions(
|
|
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
|
|
language="en",
|
|
task=Task.TRANSCRIBE,
|
|
)
|
|
|
|
with patch("sounddevice.check_input_settings"), \
|
|
patch("subprocess.Popen") as mock_popen, \
|
|
patch("time.sleep"):
|
|
|
|
# Mock a successful process
|
|
mock_process = MagicMock()
|
|
mock_process.poll.return_value = None # Process is running
|
|
mock_popen.return_value = mock_process
|
|
|
|
transcriber = RecordingTranscriber(
|
|
transcription_options=transcription_options,
|
|
input_device_index=0,
|
|
sample_rate=16000,
|
|
model_path="/fake/path",
|
|
sounddevice=MockSoundDevice(),
|
|
)
|
|
|
|
try:
|
|
transcriber.is_running = True
|
|
transcriber.start_local_whisper_server()
|
|
|
|
# Should have created an OpenAI client
|
|
assert transcriber.openai_client is not None
|
|
assert transcriber.process is not None
|
|
finally:
|
|
# Clean up to prevent QThread warnings
|
|
transcriber.is_running = False
|
|
transcriber.process = None
|
|
|
|
def test_start_local_whisper_server_with_language(self):
|
|
transcription_options = TranscriptionOptions(
|
|
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
|
|
language="fr",
|
|
task=Task.TRANSCRIBE,
|
|
)
|
|
|
|
with patch("sounddevice.check_input_settings"), \
|
|
patch("subprocess.Popen") as mock_popen, \
|
|
patch("time.sleep"):
|
|
|
|
mock_process = MagicMock()
|
|
mock_process.poll.return_value = None
|
|
mock_popen.return_value = mock_process
|
|
|
|
transcriber = RecordingTranscriber(
|
|
transcription_options=transcription_options,
|
|
input_device_index=0,
|
|
sample_rate=16000,
|
|
model_path="/fake/path",
|
|
sounddevice=MockSoundDevice(),
|
|
)
|
|
|
|
try:
|
|
transcriber.is_running = True
|
|
transcriber.start_local_whisper_server()
|
|
|
|
# Check that the language was passed to the command
|
|
call_args = mock_popen.call_args
|
|
cmd = call_args[0][0]
|
|
assert "--language" in cmd
|
|
assert "fr" in cmd
|
|
finally:
|
|
transcriber.is_running = False
|
|
transcriber.process = None
|
|
|
|
def test_start_local_whisper_server_auto_language(self):
|
|
transcription_options = TranscriptionOptions(
|
|
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
|
|
language=None,
|
|
task=Task.TRANSCRIBE,
|
|
)
|
|
|
|
with patch("sounddevice.check_input_settings"), \
|
|
patch("subprocess.Popen") as mock_popen, \
|
|
patch("time.sleep"):
|
|
|
|
mock_process = MagicMock()
|
|
mock_process.poll.return_value = None
|
|
mock_popen.return_value = mock_process
|
|
|
|
transcriber = RecordingTranscriber(
|
|
transcription_options=transcription_options,
|
|
input_device_index=0,
|
|
sample_rate=16000,
|
|
model_path="/fake/path",
|
|
sounddevice=MockSoundDevice(),
|
|
)
|
|
|
|
try:
|
|
transcriber.is_running = True
|
|
transcriber.start_local_whisper_server()
|
|
|
|
# Check that auto language was used
|
|
call_args = mock_popen.call_args
|
|
cmd = call_args[0][0]
|
|
assert "--language" in cmd
|
|
assert "auto" in cmd
|
|
finally:
|
|
transcriber.is_running = False
|
|
transcriber.process = None
|
|
|
|
def test_start_local_whisper_server_handles_failure(self):
|
|
transcription_options = TranscriptionOptions(
|
|
model=TranscriptionModel(model_type=ModelType.WHISPER_CPP),
|
|
language="en",
|
|
task=Task.TRANSCRIBE,
|
|
)
|
|
|
|
with patch("sounddevice.check_input_settings"), \
|
|
patch("subprocess.Popen") as mock_popen, \
|
|
patch("time.sleep"):
|
|
|
|
# Mock a failed process
|
|
mock_process = MagicMock()
|
|
mock_process.poll.return_value = 1 # Process terminated with error
|
|
mock_process.stderr.read.return_value = b"Error loading model"
|
|
mock_popen.return_value = mock_process
|
|
|
|
transcriber = RecordingTranscriber(
|
|
transcription_options=transcription_options,
|
|
input_device_index=0,
|
|
sample_rate=16000,
|
|
model_path="/fake/path",
|
|
sounddevice=MockSoundDevice(),
|
|
)
|
|
|
|
transcriptions = []
|
|
transcriber.transcription.connect(lambda text: transcriptions.append(text))
|
|
|
|
try:
|
|
transcriber.is_running = True
|
|
transcriber.start_local_whisper_server()
|
|
|
|
# Should not have created a client when server failed
|
|
assert transcriber.openai_client is None
|
|
# Should have emitted starting and error messages
|
|
assert len(transcriptions) >= 1
|
|
# First message should be about starting Whisper.cpp
|
|
assert "Whisper" in transcriptions[0]
|
|
finally:
|
|
transcriber.is_running = False
|
|
transcriber.process = None
|