Remove delay combo box (#99)

This commit is contained in:
Chidi Williams 2022-10-19 23:29:36 +01:00 committed by GitHub
commit c02a8b3afa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 58 additions and 75 deletions

48
gui.py
View file

@ -130,24 +130,6 @@ class QualityComboBox(QComboBox):
self.quality_changed.emit(self.qualities[index])
class DelaysComboBox(QComboBox):
"""DelaysComboBox displays the list of available delays"""
delay_changed = pyqtSignal(int)
def __init__(self, default_delay: int, parent: Optional[QWidget], *args) -> None:
super().__init__(parent, *args)
self.delays = [5, 10, 20, 30]
self.addItems(map(self.label, self.delays))
self.currentIndexChanged.connect(self.on_index_changed)
self.setCurrentText(self.label(default_delay))
def on_index_changed(self, index: int):
self.delay_changed.emit(self.delays[index])
def label(self, delay: int):
return "%ds" % delay
class TextDisplayBox(QPlainTextEdit):
"""TextDisplayBox is a read-only textbox"""
@ -260,18 +242,21 @@ class TranscriberWithSignal(QObject):
status_changed = pyqtSignal(Status)
def __init__(self, model: whisper.Whisper, language: Optional[str], task: Task, parent: Optional[QWidget], *args) -> None:
def __init__(
self, model: whisper.Whisper, language: Optional[str],
task: Task, parent: Optional[QWidget], input_device_index: Optional[int],
*args,
) -> None:
super().__init__(parent, *args)
self.transcriber = RecordingTranscriber(
model=model, language=language,
status_callback=self.on_next_status, task=task)
def start_recording(self, input_device_index: Optional[int], block_duration: int):
self.transcriber.start_recording(
status_callback=self.on_next_status, task=task,
input_device_index=input_device_index,
block_duration=block_duration,
)
def start_recording(self):
self.transcriber.start_recording()
def on_next_status(self, status: Status):
self.status_changed.emit(status)
@ -471,7 +456,6 @@ class RecordingTranscriberWidget(QWidget):
selected_quality = Quality.LOW
selected_language: Optional[str] = None
selected_device_id: Optional[int]
selected_delay = 10
selected_task = Task.TRANSCRIBE
model_download_progress_dialog: Optional[DownloadModelProgressDialog] = None
@ -501,10 +485,6 @@ class RecordingTranscriberWidget(QWidget):
parent=self)
self.tasks_combo_box.taskChanged.connect(self.on_task_changed)
delays_combo_box = DelaysComboBox(
default_delay=self.selected_delay, parent=self)
delays_combo_box.delay_changed.connect(self.on_delay_changed)
self.timer_label = TimerLabel(self)
self.record_button = RecordButton(self)
@ -519,7 +499,6 @@ class RecordingTranscriberWidget(QWidget):
((0, 5, FormLabel('Quality:', self)), (5, 7, self.quality_combo_box)),
((0, 5, FormLabel('Microphone:', self)),
(5, 7, self.audio_devices_combo_box)),
((0, 5, FormLabel('Delay:', self)), (5, 7, delays_combo_box)),
((6, 3, self.timer_label), (9, 3, self.record_button)),
((0, 12, self.text_box),),
)
@ -559,9 +538,6 @@ class RecordingTranscriberWidget(QWidget):
def on_task_changed(self, task: Task):
self.selected_task = task
def on_delay_changed(self, delay: int):
self.selected_delay = delay
def start_recording(self):
self.record_button.setDisabled(True)
@ -589,14 +565,12 @@ class RecordingTranscriberWidget(QWidget):
model=model,
language=self.selected_language,
task=self.selected_task,
input_device_index=self.selected_device_id,
parent=self
)
self.transcriber.status_changed.connect(
self.on_transcriber_status_changed)
self.transcriber.start_recording(
input_device_index=self.selected_device_id,
block_duration=self.selected_delay,
)
self.transcriber.start_recording()
def on_download_model_progress(self, current_size: int, total_size: int):
if current_size == total_size:

View file

@ -3,9 +3,8 @@ import enum
import logging
import os
import platform
import queue
import subprocess
from threading import Thread
from threading import Lock, Thread
from typing import Callable, Optional
import numpy as np
@ -14,10 +13,6 @@ import whisper
import _whisper
# When the app is opened as a .app from Finder, the path doesn't contain /usr/local/bin
# which breaks the call to run `ffmpeg`. This sets the path manually to fix that.
os.environ["PATH"] += os.pathsep + "/usr/local/bin"
class State(enum.Enum):
STARTING_NEXT_TRANSCRIPTION = 0
@ -41,28 +36,32 @@ class RecordingTranscriber:
current_thread: Optional[Thread]
current_stream: Optional[sounddevice.InputStream]
is_running = False
MAX_QUEUE_SIZE = 10
def __init__(self, model: whisper.Whisper, language: Optional[str],
status_callback: Callable[[Status], None], task: Task) -> None:
status_callback: Callable[[Status], None], task: Task,
input_device_index: Optional[int] = None) -> None:
self.model = model
self.current_stream = None
self.status_callback = status_callback
self.language = language
self.task = task
self.queue: queue.Queue[np.ndarray] = queue.Queue(
RecordingTranscriber.MAX_QUEUE_SIZE,
)
self.input_device_index = input_device_index
self.sample_rate = self.get_device_sample_rate(
device_id=input_device_index)
self.n_batch_samples = 5 * self.sample_rate # every 5 seconds
# pause queueing if more than 3 batches behind
self.max_queue_size = 3 * self.n_batch_samples
self.queue = np.ndarray([], dtype=np.float32)
self.mutex = Lock()
self.text = ''
def start_recording(self, block_duration=10, input_device_index: Optional[int] = None):
sample_rate = self.get_device_sample_rate(device_id=input_device_index)
logging.debug("Recording... language: \"%s\", model: \"%s\", task: \"%s\", device: \"%s\", block duration: \"%s\", sample rate: \"%s\"" %
(self.language, self.model._get_name(), self.task, input_device_index, block_duration, sample_rate))
def start_recording(self):
logging.debug(
f'Recording, language = {self.language}, task = {self.task}, device = {self.input_device_index}, sample rate = {self.sample_rate}')
self.current_stream = sounddevice.InputStream(
samplerate=sample_rate,
blocksize=block_duration * sample_rate,
device=input_device_index, dtype="float32",
samplerate=self.sample_rate,
blocksize=1 * self.sample_rate, # 1 sec
device=self.input_device_index, dtype="float32",
channels=1, callback=self.stream_callback)
self.current_stream.start()
@ -73,20 +72,31 @@ class RecordingTranscriber:
def process_queue(self):
while self.is_running:
try:
block = self.queue.get(block=False)
self.mutex.acquire()
if self.queue.size >= self.n_batch_samples:
batch = self.queue[:self.n_batch_samples]
self.queue = self.queue[self.n_batch_samples:]
self.mutex.release()
logging.debug(
'Processing next frame. Current queue size: %d' % self.queue.qsize())
self.status_callback(Status(State.STARTING_NEXT_TRANSCRIPTION))
result = self.model.transcribe(
audio=block, language=self.language, task=self.task.value)
text = result.get("text")
logging.debug(
"Received next result of length: %s" % len(text))
f'Processing next frame, samples = {batch.size}, total samples = {self.queue.size}, amplitude = {self.amplitude(batch)}')
self.status_callback(
Status(State.FINISHED_CURRENT_TRANSCRIPTION, text))
except queue.Empty:
continue
Status(State.STARTING_NEXT_TRANSCRIPTION))
time_started = datetime.datetime.now()
result = self.model.transcribe(
audio=batch, language=self.language, task=self.task.value,
initial_prompt=self.text) # prompt model with text from previous transcriptions
batch_text: str = result.get('text')
logging.debug(
f'Received next result, length = {len(batch_text)}, time taken = {datetime.datetime.now() - time_started}')
self.status_callback(
Status(State.FINISHED_CURRENT_TRANSCRIPTION, batch_text))
self.text += f'\n\n{batch_text}'
else:
self.mutex.release()
def get_device_sample_rate(self, device_id: Optional[int]) -> int:
"""Returns the sample rate to be used for recording. It uses the default sample rate
@ -106,13 +116,13 @@ class RecordingTranscriber:
def stream_callback(self, in_data, frame_count, time_info, status):
# Try to enqueue the next block. If the queue is already full, drop the block.
try:
chunk = in_data.ravel()
logging.debug('Received next chunk: length %s, amplitude %s, status "%s"'
% (len(chunk), (abs(max(chunk)) + abs(min(chunk))) / 2, status))
self.queue.put(chunk, block=False)
except queue.Full:
return
chunk: np.ndarray = in_data.ravel()
with self.mutex:
if self.queue.size < self.max_queue_size:
self.queue = np.append(self.queue, chunk)
def amplitude(self, arr: np.ndarray):
return (abs(max(arr)) + abs(min(arr))) / 2
def stop_recording(self):
if self.current_stream != None:
@ -120,7 +130,6 @@ class RecordingTranscriber:
logging.debug('Closed recording stream')
self.is_running = False
self.queue.queue.clear()
if self.current_thread != None:
logging.debug('Waiting for processing thread to terminate')