mirror of
https://github.com/chidiwilliams/buzz.git
synced 2026-03-14 22:55:46 +01:00
Remove delay combo box (#99)
This commit is contained in:
parent
04d309c03c
commit
c02a8b3afa
2 changed files with 58 additions and 75 deletions
48
gui.py
48
gui.py
|
|
@ -130,24 +130,6 @@ class QualityComboBox(QComboBox):
|
|||
self.quality_changed.emit(self.qualities[index])
|
||||
|
||||
|
||||
class DelaysComboBox(QComboBox):
|
||||
"""DelaysComboBox displays the list of available delays"""
|
||||
delay_changed = pyqtSignal(int)
|
||||
|
||||
def __init__(self, default_delay: int, parent: Optional[QWidget], *args) -> None:
|
||||
super().__init__(parent, *args)
|
||||
self.delays = [5, 10, 20, 30]
|
||||
self.addItems(map(self.label, self.delays))
|
||||
self.currentIndexChanged.connect(self.on_index_changed)
|
||||
self.setCurrentText(self.label(default_delay))
|
||||
|
||||
def on_index_changed(self, index: int):
|
||||
self.delay_changed.emit(self.delays[index])
|
||||
|
||||
def label(self, delay: int):
|
||||
return "%ds" % delay
|
||||
|
||||
|
||||
class TextDisplayBox(QPlainTextEdit):
|
||||
"""TextDisplayBox is a read-only textbox"""
|
||||
|
||||
|
|
@ -260,18 +242,21 @@ class TranscriberWithSignal(QObject):
|
|||
|
||||
status_changed = pyqtSignal(Status)
|
||||
|
||||
def __init__(self, model: whisper.Whisper, language: Optional[str], task: Task, parent: Optional[QWidget], *args) -> None:
|
||||
def __init__(
|
||||
self, model: whisper.Whisper, language: Optional[str],
|
||||
task: Task, parent: Optional[QWidget], input_device_index: Optional[int],
|
||||
*args,
|
||||
) -> None:
|
||||
super().__init__(parent, *args)
|
||||
self.transcriber = RecordingTranscriber(
|
||||
model=model, language=language,
|
||||
status_callback=self.on_next_status, task=task)
|
||||
|
||||
def start_recording(self, input_device_index: Optional[int], block_duration: int):
|
||||
self.transcriber.start_recording(
|
||||
status_callback=self.on_next_status, task=task,
|
||||
input_device_index=input_device_index,
|
||||
block_duration=block_duration,
|
||||
)
|
||||
|
||||
def start_recording(self):
|
||||
self.transcriber.start_recording()
|
||||
|
||||
def on_next_status(self, status: Status):
|
||||
self.status_changed.emit(status)
|
||||
|
||||
|
|
@ -471,7 +456,6 @@ class RecordingTranscriberWidget(QWidget):
|
|||
selected_quality = Quality.LOW
|
||||
selected_language: Optional[str] = None
|
||||
selected_device_id: Optional[int]
|
||||
selected_delay = 10
|
||||
selected_task = Task.TRANSCRIBE
|
||||
model_download_progress_dialog: Optional[DownloadModelProgressDialog] = None
|
||||
|
||||
|
|
@ -501,10 +485,6 @@ class RecordingTranscriberWidget(QWidget):
|
|||
parent=self)
|
||||
self.tasks_combo_box.taskChanged.connect(self.on_task_changed)
|
||||
|
||||
delays_combo_box = DelaysComboBox(
|
||||
default_delay=self.selected_delay, parent=self)
|
||||
delays_combo_box.delay_changed.connect(self.on_delay_changed)
|
||||
|
||||
self.timer_label = TimerLabel(self)
|
||||
|
||||
self.record_button = RecordButton(self)
|
||||
|
|
@ -519,7 +499,6 @@ class RecordingTranscriberWidget(QWidget):
|
|||
((0, 5, FormLabel('Quality:', self)), (5, 7, self.quality_combo_box)),
|
||||
((0, 5, FormLabel('Microphone:', self)),
|
||||
(5, 7, self.audio_devices_combo_box)),
|
||||
((0, 5, FormLabel('Delay:', self)), (5, 7, delays_combo_box)),
|
||||
((6, 3, self.timer_label), (9, 3, self.record_button)),
|
||||
((0, 12, self.text_box),),
|
||||
)
|
||||
|
|
@ -559,9 +538,6 @@ class RecordingTranscriberWidget(QWidget):
|
|||
def on_task_changed(self, task: Task):
|
||||
self.selected_task = task
|
||||
|
||||
def on_delay_changed(self, delay: int):
|
||||
self.selected_delay = delay
|
||||
|
||||
def start_recording(self):
|
||||
self.record_button.setDisabled(True)
|
||||
|
||||
|
|
@ -589,14 +565,12 @@ class RecordingTranscriberWidget(QWidget):
|
|||
model=model,
|
||||
language=self.selected_language,
|
||||
task=self.selected_task,
|
||||
input_device_index=self.selected_device_id,
|
||||
parent=self
|
||||
)
|
||||
self.transcriber.status_changed.connect(
|
||||
self.on_transcriber_status_changed)
|
||||
self.transcriber.start_recording(
|
||||
input_device_index=self.selected_device_id,
|
||||
block_duration=self.selected_delay,
|
||||
)
|
||||
self.transcriber.start_recording()
|
||||
|
||||
def on_download_model_progress(self, current_size: int, total_size: int):
|
||||
if current_size == total_size:
|
||||
|
|
|
|||
|
|
@ -3,9 +3,8 @@ import enum
|
|||
import logging
|
||||
import os
|
||||
import platform
|
||||
import queue
|
||||
import subprocess
|
||||
from threading import Thread
|
||||
from threading import Lock, Thread
|
||||
from typing import Callable, Optional
|
||||
|
||||
import numpy as np
|
||||
|
|
@ -14,10 +13,6 @@ import whisper
|
|||
|
||||
import _whisper
|
||||
|
||||
# When the app is opened as a .app from Finder, the path doesn't contain /usr/local/bin
|
||||
# which breaks the call to run `ffmpeg`. This sets the path manually to fix that.
|
||||
os.environ["PATH"] += os.pathsep + "/usr/local/bin"
|
||||
|
||||
|
||||
class State(enum.Enum):
|
||||
STARTING_NEXT_TRANSCRIPTION = 0
|
||||
|
|
@ -41,28 +36,32 @@ class RecordingTranscriber:
|
|||
current_thread: Optional[Thread]
|
||||
current_stream: Optional[sounddevice.InputStream]
|
||||
is_running = False
|
||||
MAX_QUEUE_SIZE = 10
|
||||
|
||||
def __init__(self, model: whisper.Whisper, language: Optional[str],
|
||||
status_callback: Callable[[Status], None], task: Task) -> None:
|
||||
status_callback: Callable[[Status], None], task: Task,
|
||||
input_device_index: Optional[int] = None) -> None:
|
||||
self.model = model
|
||||
self.current_stream = None
|
||||
self.status_callback = status_callback
|
||||
self.language = language
|
||||
self.task = task
|
||||
self.queue: queue.Queue[np.ndarray] = queue.Queue(
|
||||
RecordingTranscriber.MAX_QUEUE_SIZE,
|
||||
)
|
||||
self.input_device_index = input_device_index
|
||||
self.sample_rate = self.get_device_sample_rate(
|
||||
device_id=input_device_index)
|
||||
self.n_batch_samples = 5 * self.sample_rate # every 5 seconds
|
||||
# pause queueing if more than 3 batches behind
|
||||
self.max_queue_size = 3 * self.n_batch_samples
|
||||
self.queue = np.ndarray([], dtype=np.float32)
|
||||
self.mutex = Lock()
|
||||
self.text = ''
|
||||
|
||||
def start_recording(self, block_duration=10, input_device_index: Optional[int] = None):
|
||||
sample_rate = self.get_device_sample_rate(device_id=input_device_index)
|
||||
|
||||
logging.debug("Recording... language: \"%s\", model: \"%s\", task: \"%s\", device: \"%s\", block duration: \"%s\", sample rate: \"%s\"" %
|
||||
(self.language, self.model._get_name(), self.task, input_device_index, block_duration, sample_rate))
|
||||
def start_recording(self):
|
||||
logging.debug(
|
||||
f'Recording, language = {self.language}, task = {self.task}, device = {self.input_device_index}, sample rate = {self.sample_rate}')
|
||||
self.current_stream = sounddevice.InputStream(
|
||||
samplerate=sample_rate,
|
||||
blocksize=block_duration * sample_rate,
|
||||
device=input_device_index, dtype="float32",
|
||||
samplerate=self.sample_rate,
|
||||
blocksize=1 * self.sample_rate, # 1 sec
|
||||
device=self.input_device_index, dtype="float32",
|
||||
channels=1, callback=self.stream_callback)
|
||||
self.current_stream.start()
|
||||
|
||||
|
|
@ -73,20 +72,31 @@ class RecordingTranscriber:
|
|||
|
||||
def process_queue(self):
|
||||
while self.is_running:
|
||||
try:
|
||||
block = self.queue.get(block=False)
|
||||
self.mutex.acquire()
|
||||
if self.queue.size >= self.n_batch_samples:
|
||||
batch = self.queue[:self.n_batch_samples]
|
||||
self.queue = self.queue[self.n_batch_samples:]
|
||||
self.mutex.release()
|
||||
|
||||
logging.debug(
|
||||
'Processing next frame. Current queue size: %d' % self.queue.qsize())
|
||||
self.status_callback(Status(State.STARTING_NEXT_TRANSCRIPTION))
|
||||
result = self.model.transcribe(
|
||||
audio=block, language=self.language, task=self.task.value)
|
||||
text = result.get("text")
|
||||
logging.debug(
|
||||
"Received next result of length: %s" % len(text))
|
||||
f'Processing next frame, samples = {batch.size}, total samples = {self.queue.size}, amplitude = {self.amplitude(batch)}')
|
||||
self.status_callback(
|
||||
Status(State.FINISHED_CURRENT_TRANSCRIPTION, text))
|
||||
except queue.Empty:
|
||||
continue
|
||||
Status(State.STARTING_NEXT_TRANSCRIPTION))
|
||||
time_started = datetime.datetime.now()
|
||||
|
||||
result = self.model.transcribe(
|
||||
audio=batch, language=self.language, task=self.task.value,
|
||||
initial_prompt=self.text) # prompt model with text from previous transcriptions
|
||||
batch_text: str = result.get('text')
|
||||
|
||||
logging.debug(
|
||||
f'Received next result, length = {len(batch_text)}, time taken = {datetime.datetime.now() - time_started}')
|
||||
self.status_callback(
|
||||
Status(State.FINISHED_CURRENT_TRANSCRIPTION, batch_text))
|
||||
|
||||
self.text += f'\n\n{batch_text}'
|
||||
else:
|
||||
self.mutex.release()
|
||||
|
||||
def get_device_sample_rate(self, device_id: Optional[int]) -> int:
|
||||
"""Returns the sample rate to be used for recording. It uses the default sample rate
|
||||
|
|
@ -106,13 +116,13 @@ class RecordingTranscriber:
|
|||
|
||||
def stream_callback(self, in_data, frame_count, time_info, status):
|
||||
# Try to enqueue the next block. If the queue is already full, drop the block.
|
||||
try:
|
||||
chunk = in_data.ravel()
|
||||
logging.debug('Received next chunk: length %s, amplitude %s, status "%s"'
|
||||
% (len(chunk), (abs(max(chunk)) + abs(min(chunk))) / 2, status))
|
||||
self.queue.put(chunk, block=False)
|
||||
except queue.Full:
|
||||
return
|
||||
chunk: np.ndarray = in_data.ravel()
|
||||
with self.mutex:
|
||||
if self.queue.size < self.max_queue_size:
|
||||
self.queue = np.append(self.queue, chunk)
|
||||
|
||||
def amplitude(self, arr: np.ndarray):
|
||||
return (abs(max(arr)) + abs(min(arr))) / 2
|
||||
|
||||
def stop_recording(self):
|
||||
if self.current_stream != None:
|
||||
|
|
@ -120,7 +130,6 @@ class RecordingTranscriber:
|
|||
logging.debug('Closed recording stream')
|
||||
|
||||
self.is_running = False
|
||||
self.queue.queue.clear()
|
||||
|
||||
if self.current_thread != None:
|
||||
logging.debug('Waiting for processing thread to terminate')
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue