Fix styling

2026-03-14 22:55:46 +01:00 · 2022-09-26 02:44:27 +01:00 · 2022-09-26 02:44:27 +01:00 · 655975917e
commit 655975917e
parent 0622ffb7d8
3 changed files with 62 additions and 29 deletions
--- a/README.md
+++ b/README.md
@ -2,9 +2,9 @@

 Buzz transcribes audio from your computer's microphones to text using OpenAI's [Whisper](https://github.com/openai/whisper).

-## Setup
+## Requirements

-Whisper [requires ffmpeg to be installed on your computer](https://github.com/openai/whisper#setup):
+To set up Buzz, first install ffmpeg ([needed to run Whisper](https://github.com/openai/whisper#setup)).

 ```text
 # on Ubuntu or Debian
--- a/gui.py
+++ b/gui.py
@ -2,13 +2,21 @@ import enum
 from typing import List, Tuple

 import pyaudio
+import whisper
 from PyQt5.QtCore import *
 from PyQt5.QtGui import *
 from PyQt5.QtWidgets import *
+from whisper import tokenizer

 from transcriber import Transcriber


+class Label(QLabel):
+    def __init__(self, name: str,  *args) -> None:
+        super().__init__(name, *args)
+        self.setStyleSheet('QLabel { color: #ddd }')
+
+
 class AudioDevicesComboBox(QComboBox):
    """AudioDevicesComboBox is a combo box for selecting audio devices."""
    deviceChanged = pyqtSignal(int)
@ -36,6 +44,36 @@ class AudioDevicesComboBox(QComboBox):
        self.deviceChanged.emit(self.audio_devices[index][0])


+class LanguagesComboBox(QComboBox):
+    languageChanged = pyqtSignal(str)
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+        self.languages = {'none': 'Detect language', **tokenizer.LANGUAGES}
+        self.addItems(map(lambda lang: lang.title(), self.languages.values()))
+        self.currentIndexChanged.connect(self.on_index_changed)
+
+    def on_index_changed(self, index: int):
+        key = list(self.languages.values())[index]
+        self.languageChanged.emit(
+            self.languages.get(key) if key != 'none' else None)
+
+
+class ModelsComboBox(QComboBox):
+    """ModelsComboBox displays the list of available Whisper models for selection
+    """
+    modelChanged = pyqtSignal(str)
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+        self.models = whisper.available_models()
+        self.addItems(self.models)
+        self.currentIndexChanged.connect(self.on_index_changed)
+
+    def on_index_changed(self, index: int):
+        self.modelChanged.emit(self.models[index])
+
+
 class RecordButton(QPushButton):
    class Status(enum.Enum):
        RECORDING = enum.auto()
@ -99,7 +137,6 @@ class Application(QApplication):
    def __init__(self) -> None:
        super().__init__([])

-        self.setStyle("fusion")
        self.setStyleSheet("""QComboBox {
            color: #eee;
        }""")
@ -107,7 +144,8 @@ class Application(QApplication):
        self.window = QWidget()
        self.window.setFixedSize(400, 400)

-        layout = QVBoxLayout()
+        layout = QGridLayout()
+        self.window.setLayout(layout)

        self.audio_devices_combo_box = AudioDevicesComboBox()
        self.audio_devices_combo_box.deviceChanged.connect(
@ -116,38 +154,29 @@ class Application(QApplication):
        self.record_button = RecordButton()
        self.record_button.statusChanged.connect(self.on_status_changed)

-        self.text_box = self.get_text_box()
+        self.text_box = self.text_box()

-        layout.addLayout(self.get_audio_devices_row(
-            self.audio_devices_combo_box))
-        layout.addLayout(self.get_button_row(self.record_button))
-        layout.addWidget(self.text_box)
+        layout.addWidget(Label('Model:'), 0, 0, 1, 3)
+        layout.addWidget(ModelsComboBox(), 0, 3, 1, 9)
+
+        layout.addWidget(Label('Language:'), 1, 0, 1, 3)
+        layout.addWidget(LanguagesComboBox(), 1, 3, 1, 9)
+
+        layout.addWidget(Label('Microphone:'), 2, 0, 1, 3)
+        layout.addWidget(self.audio_devices_combo_box, 2, 3, 1, 9)
+
+        layout.addWidget(self.record_button, 3, 9, 1, 3)
+
+        layout.addWidget(self.text_box, 4, 0, 1, 12)

-        self.window.setLayout(layout)
        self.window.show()

-    def get_audio_devices_row(self, audio_devices_combo_box: AudioDevicesComboBox):
-        row = QHBoxLayout()
-
-        label = QLabel()
-        label.setText('Select microphone:')
-        label.setStyleSheet('QLabel { color: #ddd }')
-
-        row.addWidget(label)
-        row.addWidget(audio_devices_combo_box)
-        row.addStretch(1)
-        return row
-
-    def get_button_row(self, record_button: RecordButton):
-        row = QHBoxLayout()
-        row.addWidget(record_button)
-        row.addStretch(1)
-        return row
-
-    def get_text_box(self):
+    def text_box(self):
        box = QTextEdit()
        box.setReadOnly(True)
        box.setPlaceholderText('Click Record to begin...')
+        box.setStyleSheet(
+            'QTextEdit { padding-left:10; padding-top:10; padding-bottom:10; padding-right:10; background-color: #151515; border-radius: 6; }')
        return box

    def on_next_text(self, text: str):
@ -165,6 +194,9 @@ class Application(QApplication):
            self.stop_recording()

    def start_recording(self):
+        # Clear text box placeholder
+        self.text_box.setPlaceholderText('')
+
        # Thread needs to be attached to app object to live after end of method
        self.thread = QThread()

--- a/transcriber.py
+++ b/transcriber.py
@ -19,13 +19,14 @@ class Transcriber:
    # after which the transcriber will stop queueing new frames
    chunk_drop_factor = 5

-    def __init__(self, model_name="tiny", text_callback: Callable[[str], None] = print) -> None:
+    def __init__(self, model_name="tiny", language=None, text_callback: Callable[[str], None] = print) -> None:
        self.pyaudio = pyaudio.PyAudio()
        self.model = whisper.load_model(model_name)
        self.stream = None
        self.frames = []
        self.text_callback = text_callback
        self.stopped = False
+        self.language = language

    def start_recording(self, frames_per_buffer=1024, sample_format=pyaudio.paInt16,
                        channels=1, rate=44100, chunk_duration=4, input_device_index=None):
@ -63,7 +64,7 @@ class Transcriber:
                    self.write_chunk(chunk_path, channels, rate, frames)

                    result = self.model.transcribe(
-                        audio=chunk_path, language="en")
+                        audio=chunk_path, language=self.language)

                    logging.debug("Received next result: \"%s\"" %
                                  result["text"])
@ -104,7 +105,7 @@ class Transcriber:

    def chunk_path(self):
        chunk_id = "clip-%s.wav" % (datetime.utcnow().strftime('%Y%m%d%H%M%S'))
-        return os.path.join(self.tmp_dir(), chunk_id)
+        return os.path.join(tempfile.gettempdir(), chunk_id)

    # https://stackoverflow.com/a/43418319/9830227
    def tmp_dir(self):