diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..0a221e80 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "files.associations": { + "Buzz.spec": "python" + } +} \ No newline at end of file diff --git a/Buzz.spec b/Buzz.spec index 4a9339ff..7aa8dadf 100644 --- a/Buzz.spec +++ b/Buzz.spec @@ -5,7 +5,6 @@ datas = [] datas += collect_data_files('torch') datas += copy_metadata('tqdm') datas += copy_metadata('torch') -datas += copy_metadata('tqdm') datas += copy_metadata('regex') datas += copy_metadata('requests') datas += copy_metadata('packaging') @@ -23,8 +22,8 @@ a = Analysis( pathex=[], binaries=[], datas=datas, - hiddenimports=['googleapiclient', 'apiclient', 'pytorch', '“sklearn.utils._cython_blas”', - '“sklearn.neighbors.typedefs”', '“sklearn.neighbors.quad_tree”', '“sklearn.tree”', '“sklearn.tree._utils”'], + hiddenimports=['apiclient', 'pytorch', '“sklearn.utils._cython_blas”', '“sklearn.neighbors.typedefs”', + '“sklearn.neighbors.quad_tree”', '“sklearn.tree”', '“sklearn.tree._utils”'], hookspath=[], hooksconfig={}, runtime_hooks=[], diff --git a/main.py b/main.py index 1f00c913..2f6125f6 100644 --- a/main.py +++ b/main.py @@ -1,9 +1,82 @@ +import logging import warnings +from PyQt5.QtCore import * +from PyQt5.QtWidgets import * + from transcriber import Transcriber -# logging.basicConfig(level=logging.DEBUG) +logging.basicConfig(level=logging.DEBUG) warnings.filterwarnings('ignore') -transcriber = Transcriber() -transcriber.start_recording() + +class TranscriberWorker(QObject): + text = pyqtSignal(str) + finished = pyqtSignal() + + def __init__(self, *args) -> None: + super().__init__(*args) + self.transcriber = Transcriber(text_callback=self.on_next_text) + + def run(self): + self.transcriber.start_recording() + self.finished.emit() + + def on_next_text(self, text: str): + self.text.emit(text) + + def stop_recording(self): + self.transcriber.stop_recording() + + +class Application: + def __init__(self) -> None: + self.app = QApplication([]) + self.window = QWidget() + + layout = QVBoxLayout() + + record_button = QPushButton("Record") + record_button.clicked.connect(self.on_click_record) + + stop_button = QPushButton("Stop") + stop_button.clicked.connect(self.on_click_stop) + + self.text_box = QTextEdit() + self.text_box.setReadOnly(True) + + layout.addWidget(record_button) + layout.addWidget(stop_button) + layout.addWidget(self.text_box) + + self.window.setLayout(layout) + + def on_next_text(self, text: str): + self.text_box.append(text) + + def on_click_record(self): + self.thread = QThread() + + self.transcriber_worker = TranscriberWorker() + self.transcriber_worker.moveToThread(self.thread) + + self.thread.started.connect(self.transcriber_worker.run) + self.transcriber_worker.finished.connect(self.thread.quit) + self.transcriber_worker.finished.connect( + self.transcriber_worker.deleteLater) + self.thread.finished.connect(self.thread.deleteLater) + + self.transcriber_worker.text.connect(self.on_next_text) + + self.thread.start() + + def on_click_stop(self): + self.transcriber_worker.stop_recording() + + def start(self): + self.window.show() + self.app.exec() + + +app = Application() +app.start() diff --git a/transcriber.py b/transcriber.py index 0ba75168..3149ab89 100644 --- a/transcriber.py +++ b/transcriber.py @@ -2,17 +2,20 @@ import logging import os import wave from datetime import datetime +from typing import Callable import pyaudio import whisper class Transcriber: - def __init__(self, model_name="tiny") -> None: + def __init__(self, model_name="tiny", text_callback: Callable[[str], None] = print) -> None: self.pyaudio = pyaudio.PyAudio() self.model = whisper.load_model(model_name) self.stream = None self.frames = [] + self.text_callback = text_callback + self.stopped = False def start_recording(self, frames_per_buffer=1024, sample_format=pyaudio.paInt16, channels=1, rate=44100, chunk_duration=4): logging.debug("Recording...") @@ -26,9 +29,14 @@ class Transcriber: self.stream.start_stream() frames_per_chunk = int(rate / frames_per_buffer * chunk_duration) - while True: + if self.stopped: + self.frames = [] + logging.debug("Recording stopped. Exiting...") + return if len(self.frames) > frames_per_chunk: + logging.debug("Buffer size: %d. Transcribing next %d frames..." % + (len(self.frames), frames_per_chunk)) chunk_path = self.chunk_path() try: clip = [] @@ -36,18 +44,19 @@ class Transcriber: clip.append(self.frames[i]) frames = b''.join(clip) + # TODO: Can we pass the chunk to whisper in-memory? self.write_chunk(chunk_path, channels, rate, frames) result = self.model.transcribe( audio=chunk_path, language="en") - # TODO: this should probably be a callback or output buffer - print(result["text"]) + logging.debug("Received next result: \"%s\"" % + result["text"]) + self.text_callback(result["text"]) os.remove(chunk_path) self.frames = self.frames[frames_per_chunk:] - logging.debug("Buffer size: ", len(self.frames)) except KeyboardInterrupt as e: self.stop_recording() os.remove(chunk_path) @@ -59,6 +68,7 @@ class Transcriber: def stop_recording(self): logging.debug("Ending recording...") + self.stopped = True self.stream.stop_stream() self.stream.close() self.pyaudio.terminate()