Remove delay combo box (#99)

2026-03-14 22:55:46 +01:00 · 2022-10-19 23:29:36 +01:00 · 2022-10-19 23:29:36 +01:00 · c02a8b3afa
commit c02a8b3afa
parent 04d309c03c
2 changed files with 58 additions and 75 deletions
--- a/gui.py
+++ b/gui.py
@ -130,24 +130,6 @@ class QualityComboBox(QComboBox):
        self.quality_changed.emit(self.qualities[index])


-class DelaysComboBox(QComboBox):
-    """DelaysComboBox displays the list of available delays"""
-    delay_changed = pyqtSignal(int)
-
-    def __init__(self, default_delay: int, parent: Optional[QWidget], *args) -> None:
-        super().__init__(parent, *args)
-        self.delays = [5, 10, 20, 30]
-        self.addItems(map(self.label, self.delays))
-        self.currentIndexChanged.connect(self.on_index_changed)
-        self.setCurrentText(self.label(default_delay))
-
-    def on_index_changed(self, index: int):
-        self.delay_changed.emit(self.delays[index])
-
-    def label(self, delay: int):
-        return "%ds" % delay
-
-
 class TextDisplayBox(QPlainTextEdit):
    """TextDisplayBox is a read-only textbox"""

@ -260,18 +242,21 @@ class TranscriberWithSignal(QObject):

    status_changed = pyqtSignal(Status)

-    def __init__(self, model: whisper.Whisper, language: Optional[str], task: Task, parent: Optional[QWidget], *args) -> None:
+    def __init__(
+            self, model: whisper.Whisper, language: Optional[str],
+            task: Task, parent: Optional[QWidget], input_device_index: Optional[int],
+            *args,
+    ) -> None:
        super().__init__(parent, *args)
        self.transcriber = RecordingTranscriber(
            model=model, language=language,
-            status_callback=self.on_next_status, task=task)
-
-    def start_recording(self, input_device_index: Optional[int], block_duration: int):
-        self.transcriber.start_recording(
+            status_callback=self.on_next_status, task=task,
            input_device_index=input_device_index,
-            block_duration=block_duration,
        )

+    def start_recording(self):
+        self.transcriber.start_recording()
+
    def on_next_status(self, status: Status):
        self.status_changed.emit(status)

@ -471,7 +456,6 @@ class RecordingTranscriberWidget(QWidget):
    selected_quality = Quality.LOW
    selected_language: Optional[str] = None
    selected_device_id: Optional[int]
-    selected_delay = 10
    selected_task = Task.TRANSCRIBE
    model_download_progress_dialog: Optional[DownloadModelProgressDialog] = None

@ -501,10 +485,6 @@ class RecordingTranscriberWidget(QWidget):
            parent=self)
        self.tasks_combo_box.taskChanged.connect(self.on_task_changed)

-        delays_combo_box = DelaysComboBox(
-            default_delay=self.selected_delay, parent=self)
-        delays_combo_box.delay_changed.connect(self.on_delay_changed)
-
        self.timer_label = TimerLabel(self)

        self.record_button = RecordButton(self)
@ -519,7 +499,6 @@ class RecordingTranscriberWidget(QWidget):
            ((0, 5, FormLabel('Quality:', self)), (5, 7, self.quality_combo_box)),
            ((0, 5, FormLabel('Microphone:', self)),
             (5, 7, self.audio_devices_combo_box)),
-            ((0, 5, FormLabel('Delay:', self)), (5, 7, delays_combo_box)),
            ((6, 3, self.timer_label), (9, 3, self.record_button)),
            ((0, 12, self.text_box),),
        )
@ -559,9 +538,6 @@ class RecordingTranscriberWidget(QWidget):
    def on_task_changed(self, task: Task):
        self.selected_task = task

-    def on_delay_changed(self, delay: int):
-        self.selected_delay = delay
-
    def start_recording(self):
        self.record_button.setDisabled(True)

@ -589,14 +565,12 @@ class RecordingTranscriberWidget(QWidget):
            model=model,
            language=self.selected_language,
            task=self.selected_task,
+            input_device_index=self.selected_device_id,
            parent=self
        )
        self.transcriber.status_changed.connect(
            self.on_transcriber_status_changed)
-        self.transcriber.start_recording(
-            input_device_index=self.selected_device_id,
-            block_duration=self.selected_delay,
-        )
+        self.transcriber.start_recording()

    def on_download_model_progress(self, current_size: int, total_size: int):
        if current_size == total_size:
--- a/transcriber.py
+++ b/transcriber.py
@ -3,9 +3,8 @@ import enum
 import logging
 import os
 import platform
-import queue
 import subprocess
-from threading import Thread
+from threading import Lock, Thread
 from typing import Callable, Optional

 import numpy as np
@ -14,10 +13,6 @@ import whisper

 import _whisper

-# When the app is opened as a .app from Finder, the path doesn't contain /usr/local/bin
-# which breaks the call to run `ffmpeg`. This sets the path manually to fix that.
-os.environ["PATH"] += os.pathsep + "/usr/local/bin"
-

 class State(enum.Enum):
    STARTING_NEXT_TRANSCRIPTION = 0
@ -41,28 +36,32 @@ class RecordingTranscriber:
    current_thread: Optional[Thread]
    current_stream: Optional[sounddevice.InputStream]
    is_running = False
-    MAX_QUEUE_SIZE = 10

    def __init__(self, model: whisper.Whisper, language: Optional[str],
-                 status_callback: Callable[[Status], None], task: Task) -> None:
+                 status_callback: Callable[[Status], None], task: Task,
+                 input_device_index: Optional[int] = None) -> None:
        self.model = model
        self.current_stream = None
        self.status_callback = status_callback
        self.language = language
        self.task = task
-        self.queue: queue.Queue[np.ndarray] = queue.Queue(
-            RecordingTranscriber.MAX_QUEUE_SIZE,
-        )
+        self.input_device_index = input_device_index
+        self.sample_rate = self.get_device_sample_rate(
+            device_id=input_device_index)
+        self.n_batch_samples = 5 * self.sample_rate  # every 5 seconds
+        # pause queueing if more than 3 batches behind
+        self.max_queue_size = 3 * self.n_batch_samples
+        self.queue = np.ndarray([], dtype=np.float32)
+        self.mutex = Lock()
+        self.text = ''

-    def start_recording(self, block_duration=10, input_device_index: Optional[int] = None):
-        sample_rate = self.get_device_sample_rate(device_id=input_device_index)
-
-        logging.debug("Recording... language: \"%s\", model: \"%s\", task: \"%s\", device: \"%s\", block duration: \"%s\", sample rate: \"%s\"" %
-                      (self.language, self.model._get_name(), self.task, input_device_index, block_duration, sample_rate))
+    def start_recording(self):
+        logging.debug(
+            f'Recording, language = {self.language}, task = {self.task}, device = {self.input_device_index}, sample rate = {self.sample_rate}')
        self.current_stream = sounddevice.InputStream(
-            samplerate=sample_rate,
-            blocksize=block_duration * sample_rate,
-            device=input_device_index, dtype="float32",
+            samplerate=self.sample_rate,
+            blocksize=1 * self.sample_rate,  # 1 sec
+            device=self.input_device_index, dtype="float32",
            channels=1, callback=self.stream_callback)
        self.current_stream.start()

@ -73,20 +72,31 @@ class RecordingTranscriber:

    def process_queue(self):
        while self.is_running:
-            try:
-                block = self.queue.get(block=False)
+            self.mutex.acquire()
+            if self.queue.size >= self.n_batch_samples:
+                batch = self.queue[:self.n_batch_samples]
+                self.queue = self.queue[self.n_batch_samples:]
+                self.mutex.release()
+
                logging.debug(
-                    'Processing next frame. Current queue size: %d' % self.queue.qsize())
-                self.status_callback(Status(State.STARTING_NEXT_TRANSCRIPTION))
-                result = self.model.transcribe(
-                    audio=block, language=self.language, task=self.task.value)
-                text = result.get("text")
-                logging.debug(
-                    "Received next result of length: %s" % len(text))
+                    f'Processing next frame, samples = {batch.size}, total samples = {self.queue.size}, amplitude = {self.amplitude(batch)}')
                self.status_callback(
-                    Status(State.FINISHED_CURRENT_TRANSCRIPTION, text))
-            except queue.Empty:
-                continue
+                    Status(State.STARTING_NEXT_TRANSCRIPTION))
+                time_started = datetime.datetime.now()
+
+                result = self.model.transcribe(
+                    audio=batch, language=self.language, task=self.task.value,
+                    initial_prompt=self.text)  # prompt model with text from previous transcriptions
+                batch_text: str = result.get('text')
+
+                logging.debug(
+                    f'Received next result, length = {len(batch_text)}, time taken = {datetime.datetime.now() - time_started}')
+                self.status_callback(
+                    Status(State.FINISHED_CURRENT_TRANSCRIPTION, batch_text))
+
+                self.text += f'\n\n{batch_text}'
+            else:
+                self.mutex.release()

    def get_device_sample_rate(self, device_id: Optional[int]) -> int:
        """Returns the sample rate to be used for recording. It uses the default sample rate
@ -106,13 +116,13 @@ class RecordingTranscriber:

    def stream_callback(self, in_data, frame_count, time_info, status):
        # Try to enqueue the next block. If the queue is already full, drop the block.
-        try:
-            chunk = in_data.ravel()
-            logging.debug('Received next chunk: length %s, amplitude %s, status "%s"'
-                          % (len(chunk), (abs(max(chunk)) + abs(min(chunk))) / 2, status))
-            self.queue.put(chunk, block=False)
-        except queue.Full:
-            return
+        chunk: np.ndarray = in_data.ravel()
+        with self.mutex:
+            if self.queue.size < self.max_queue_size:
+                self.queue = np.append(self.queue, chunk)
+
+    def amplitude(self, arr: np.ndarray):
+        return (abs(max(arr)) + abs(min(arr))) / 2

    def stop_recording(self):
        if self.current_stream != None:
@ -120,7 +130,6 @@ class RecordingTranscriber:
            logging.debug('Closed recording stream')

        self.is_running = False
-        self.queue.queue.clear()

        if self.current_thread != None:
            logging.debug('Waiting for processing thread to terminate')