Add Qt application

2026-03-14 22:55:46 +01:00 · 2022-09-25 10:41:20 +01:00 · 2022-09-25 10:41:20 +01:00 · 57f3f137fe
commit 57f3f137fe
parent eb9744773e
4 changed files with 98 additions and 11 deletions
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@ -0,0 +1,5 @@
+{
+  "files.associations": {
+    "Buzz.spec": "python"
+  }
+}
--- a/Buzz.spec
+++ b/Buzz.spec
@ -5,7 +5,6 @@ datas = []
 datas += collect_data_files('torch')
 datas += copy_metadata('tqdm')
 datas += copy_metadata('torch')
-datas += copy_metadata('tqdm')
 datas += copy_metadata('regex')
 datas += copy_metadata('requests')
 datas += copy_metadata('packaging')
@ -23,8 +22,8 @@ a = Analysis(
    pathex=[],
    binaries=[],
    datas=datas,
-    hiddenimports=['googleapiclient', 'apiclient', 'pytorch', '“sklearn.utils._cython_blas”',
-                   '“sklearn.neighbors.typedefs”', '“sklearn.neighbors.quad_tree”', '“sklearn.tree”', '“sklearn.tree._utils”'],
+    hiddenimports=['apiclient', 'pytorch', '“sklearn.utils._cython_blas”', '“sklearn.neighbors.typedefs”',
+                   '“sklearn.neighbors.quad_tree”', '“sklearn.tree”', '“sklearn.tree._utils”'],
    hookspath=[],
    hooksconfig={},
    runtime_hooks=[],
--- a/main.py
+++ b/main.py
@ -1,9 +1,82 @@
+import logging
 import warnings

+from PyQt5.QtCore import *
+from PyQt5.QtWidgets import *
+
 from transcriber import Transcriber

-# logging.basicConfig(level=logging.DEBUG)
+logging.basicConfig(level=logging.DEBUG)
 warnings.filterwarnings('ignore')

-transcriber = Transcriber()
-transcriber.start_recording()
+
+class TranscriberWorker(QObject):
+    text = pyqtSignal(str)
+    finished = pyqtSignal()
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+        self.transcriber = Transcriber(text_callback=self.on_next_text)
+
+    def run(self):
+        self.transcriber.start_recording()
+        self.finished.emit()
+
+    def on_next_text(self, text: str):
+        self.text.emit(text)
+
+    def stop_recording(self):
+        self.transcriber.stop_recording()
+
+
+class Application:
+    def __init__(self) -> None:
+        self.app = QApplication([])
+        self.window = QWidget()
+
+        layout = QVBoxLayout()
+
+        record_button = QPushButton("Record")
+        record_button.clicked.connect(self.on_click_record)
+
+        stop_button = QPushButton("Stop")
+        stop_button.clicked.connect(self.on_click_stop)
+
+        self.text_box = QTextEdit()
+        self.text_box.setReadOnly(True)
+
+        layout.addWidget(record_button)
+        layout.addWidget(stop_button)
+        layout.addWidget(self.text_box)
+
+        self.window.setLayout(layout)
+
+    def on_next_text(self, text: str):
+        self.text_box.append(text)
+
+    def on_click_record(self):
+        self.thread = QThread()
+
+        self.transcriber_worker = TranscriberWorker()
+        self.transcriber_worker.moveToThread(self.thread)
+
+        self.thread.started.connect(self.transcriber_worker.run)
+        self.transcriber_worker.finished.connect(self.thread.quit)
+        self.transcriber_worker.finished.connect(
+            self.transcriber_worker.deleteLater)
+        self.thread.finished.connect(self.thread.deleteLater)
+
+        self.transcriber_worker.text.connect(self.on_next_text)
+
+        self.thread.start()
+
+    def on_click_stop(self):
+        self.transcriber_worker.stop_recording()
+
+    def start(self):
+        self.window.show()
+        self.app.exec()
+
+
+app = Application()
+app.start()
--- a/transcriber.py
+++ b/transcriber.py
@ -2,17 +2,20 @@ import logging
 import os
 import wave
 from datetime import datetime
+from typing import Callable

 import pyaudio
 import whisper


 class Transcriber:
-    def __init__(self, model_name="tiny") -> None:
+    def __init__(self, model_name="tiny", text_callback: Callable[[str], None] = print) -> None:
        self.pyaudio = pyaudio.PyAudio()
        self.model = whisper.load_model(model_name)
        self.stream = None
        self.frames = []
+        self.text_callback = text_callback
+        self.stopped = False

    def start_recording(self, frames_per_buffer=1024, sample_format=pyaudio.paInt16, channels=1, rate=44100, chunk_duration=4):
        logging.debug("Recording...")
@ -26,9 +29,14 @@ class Transcriber:
        self.stream.start_stream()

        frames_per_chunk = int(rate / frames_per_buffer * chunk_duration)
-
        while True:
+            if self.stopped:
+                self.frames = []
+                logging.debug("Recording stopped. Exiting...")
+                return
            if len(self.frames) > frames_per_chunk:
+                logging.debug("Buffer size: %d. Transcribing next %d frames..." %
+                              (len(self.frames), frames_per_chunk))
                chunk_path = self.chunk_path()
                try:
                    clip = []
@ -36,18 +44,19 @@ class Transcriber:
                        clip.append(self.frames[i])
                    frames = b''.join(clip)

+                    # TODO: Can we pass the chunk to whisper in-memory?
                    self.write_chunk(chunk_path, channels, rate, frames)

                    result = self.model.transcribe(
                        audio=chunk_path, language="en")

-                    # TODO: this should probably be a callback or output buffer
-                    print(result["text"])
+                    logging.debug("Received next result: \"%s\"" %
+                                  result["text"])
+                    self.text_callback(result["text"])

                    os.remove(chunk_path)

                    self.frames = self.frames[frames_per_chunk:]
-                    logging.debug("Buffer size: ", len(self.frames))
                except KeyboardInterrupt as e:
                    self.stop_recording()
                    os.remove(chunk_path)
@ -59,6 +68,7 @@ class Transcriber:

    def stop_recording(self):
        logging.debug("Ending recording...")
+        self.stopped = True
        self.stream.stop_stream()
        self.stream.close()
        self.pyaudio.terminate()