mirror of
https://github.com/chidiwilliams/buzz.git
synced 2024-06-26 11:40:09 +02:00
Add CLI (#424)
This commit is contained in:
parent
66bd9a1834
commit
32af05da30
|
@ -7,4 +7,4 @@ omit =
|
|||
directory = coverage/html
|
||||
|
||||
[report]
|
||||
fail_under = 76
|
||||
fail_under = 74
|
||||
|
|
22
README.md
22
README.md
|
@ -20,11 +20,13 @@ OpenAI's [Whisper](https://github.com/openai/whisper).
|
|||
|
||||
- Import audio and video files and export transcripts to TXT, SRT, and
|
||||
VTT ([Demo](https://www.loom.com/share/cf263b099ac3481082bb56d19b7c87fe))
|
||||
- Transcription and translation from your computer's microphones to text (Resource-intensive and may not be real-time, [Demo](https://www.loom.com/share/564b753eb4d44b55b985b8abd26b55f7))
|
||||
- Transcription and translation from your computer's microphones to text (Resource-intensive and may not be
|
||||
real-time, [Demo](https://www.loom.com/share/564b753eb4d44b55b985b8abd26b55f7))
|
||||
- Supports [Whisper](https://github.com/openai/whisper#available-models-and-languages),
|
||||
[Whisper.cpp](https://github.com/ggerganov/whisper.cpp), [Faster Whisper](https://github.com/guillaumekln/faster-whisper),
|
||||
[Whisper-compatible Hugging Face models](https://huggingface.co/models?other=whisper), and
|
||||
the [OpenAI Whisper API](https://platform.openai.com/docs/api-reference/introduction)
|
||||
- [Command-Line Interface](#command-line-interface)
|
||||
- Available on Mac, Windows, and Linux
|
||||
|
||||
## Installation
|
||||
|
@ -129,6 +131,24 @@ and [Virtual Audio Cable](https://vac.muzychenko.net/en/)).
|
|||
6. Open Buzz, select BlackHole as your microphone, and record as before to see transcriptions from the audio playing
|
||||
through BlackHole.
|
||||
|
||||
## Command-Line Interface
|
||||
|
||||
### `add`
|
||||
|
||||
Start a new transcription task
|
||||
|
||||
Examples:
|
||||
|
||||
```shell
|
||||
# Translate two MP3 files from French to English using OpenAI Whisper API
|
||||
buzz add --task translate --language fr --model-type openaiapi /Users/user/Downloads/1b3b03e4-8db5-ea2c-ace5-b71ff32e3304.mp3 /Users/user/Downloads/koaf9083k1lkpsfdi0.mp3
|
||||
|
||||
# Transcribe an MP4 using Whisper.cpp "small" model and immediately export to SRT and VTT files
|
||||
buzz add --task transcribe --model-type whispercpp --model-size small --prompt "My initial prompt" --srt --vtt /Users/user/Downloads/buzz/1b3b03e4-8db5-ea2c-ace5-b71ff32e3304.mp4
|
||||
```
|
||||
|
||||
Run `buzz add --help` to see all available options.
|
||||
|
||||
## Build
|
||||
|
||||
To build/run Buzz locally from source, first install the requirements:
|
||||
|
|
157
buzz/cli.py
Normal file
157
buzz/cli.py
Normal file
|
@ -0,0 +1,157 @@
|
|||
import enum
|
||||
import sys
|
||||
import typing
|
||||
|
||||
from PyQt6.QtCore import QCommandLineParser, QCommandLineOption
|
||||
|
||||
from buzz.gui import Application
|
||||
from buzz.model_loader import ModelType, WhisperModelSize, TranscriptionModel, get_local_model_path
|
||||
from buzz.store.keyring_store import KeyringStore
|
||||
from buzz.transcriber import Task, FileTranscriptionTask, FileTranscriptionOptions, TranscriptionOptions, LANGUAGES, \
|
||||
OutputFormat
|
||||
|
||||
|
||||
class CommandLineError(Exception):
|
||||
def __init__(self, message: str):
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class CommandLineModelType(enum.Enum):
|
||||
WHISPER = 'whisper'
|
||||
WHISPER_CPP = 'whispercpp'
|
||||
HUGGING_FACE = 'huggingface'
|
||||
FASTER_WHISPER = 'fasterwhisper'
|
||||
OPEN_AI_WHISPER_API = 'openaiapi'
|
||||
|
||||
|
||||
def parse_command_line(app: Application):
|
||||
parser = QCommandLineParser()
|
||||
try:
|
||||
parse(app, parser)
|
||||
except CommandLineError as exc:
|
||||
print(f'Error: {str(exc)}\n', file=sys.stderr)
|
||||
print(parser.helpText())
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def parse(app: Application, parser: QCommandLineParser):
|
||||
parser.addPositionalArgument('<command>', 'One of the following commands:\n- add')
|
||||
parser.parse(app.arguments())
|
||||
|
||||
args = parser.positionalArguments()
|
||||
if len(args) == 0:
|
||||
parser.addHelpOption()
|
||||
parser.addVersionOption()
|
||||
|
||||
parser.process(app)
|
||||
return
|
||||
|
||||
command = args[0]
|
||||
if command == "add":
|
||||
parser.clearPositionalArguments()
|
||||
|
||||
parser.addPositionalArgument('files', 'Input file paths', '[file file file...]')
|
||||
|
||||
task_option = QCommandLineOption(['t', 'task'],
|
||||
f'The task to perform. Allowed: {join_values(Task)}. Default: {Task.TRANSCRIBE.value}.',
|
||||
'task',
|
||||
Task.TRANSCRIBE.value)
|
||||
model_type_option = QCommandLineOption(['m', 'model-type'],
|
||||
f'Model type. Allowed: {join_values(CommandLineModelType)}. Default: {CommandLineModelType.WHISPER.value}.',
|
||||
'model-type',
|
||||
CommandLineModelType.WHISPER.value)
|
||||
model_size_option = QCommandLineOption(['s', 'model-size'],
|
||||
f'Model size. Use only when --model-type is whisper, whispercpp, or fasterwhisper. Allowed: {join_values(WhisperModelSize)}. Default: {WhisperModelSize.TINY.value}.',
|
||||
'model-size', WhisperModelSize.TINY.value)
|
||||
hugging_face_model_id_option = QCommandLineOption(['hfid'],
|
||||
f'Hugging Face model ID. Use only when --model-type is huggingface. Example: "openai/whisper-tiny"',
|
||||
'id')
|
||||
language_option = QCommandLineOption(['l', 'language'],
|
||||
f'Language code. Allowed: {", ".join(sorted([k + " (" + LANGUAGES[k].title() + ")" for k in LANGUAGES]))}. Leave empty to detect language.',
|
||||
'code', '')
|
||||
initial_prompt_option = QCommandLineOption(['p', 'prompt'], f'Initial prompt', 'prompt', '')
|
||||
open_ai_access_token_option = QCommandLineOption('openai-token',
|
||||
f'OpenAI access token. Use only when --model-type is {CommandLineModelType.OPEN_AI_WHISPER_API.value}. Defaults to your previously saved access token, if one exists.',
|
||||
'token')
|
||||
srt_option = QCommandLineOption(['srt'], 'Output result in an SRT file.')
|
||||
vtt_option = QCommandLineOption(['vtt'], 'Output result in a VTT file.')
|
||||
txt_option = QCommandLineOption('txt', 'Output result in a TXT file.')
|
||||
|
||||
parser.addOptions(
|
||||
[task_option, model_type_option, model_size_option, hugging_face_model_id_option, language_option,
|
||||
initial_prompt_option, open_ai_access_token_option, srt_option, vtt_option, txt_option])
|
||||
|
||||
parser.addHelpOption()
|
||||
parser.addVersionOption()
|
||||
|
||||
parser.process(app)
|
||||
|
||||
# slice after first argument, the command
|
||||
file_paths = parser.positionalArguments()[1:]
|
||||
if len(file_paths) == 0:
|
||||
raise CommandLineError('No input files')
|
||||
|
||||
task = parse_enum_option(task_option, parser, Task)
|
||||
|
||||
model_type = parse_enum_option(model_type_option, parser, CommandLineModelType)
|
||||
model_size = parse_enum_option(model_size_option, parser, WhisperModelSize)
|
||||
|
||||
hugging_face_model_id = parser.value(hugging_face_model_id_option)
|
||||
|
||||
if hugging_face_model_id == '' and model_type == CommandLineModelType.HUGGING_FACE:
|
||||
raise CommandLineError('--hfid is required when --model-type is huggingface')
|
||||
|
||||
model = TranscriptionModel(model_type=ModelType[model_type.name], whisper_model_size=model_size,
|
||||
hugging_face_model_id=hugging_face_model_id)
|
||||
model_path = get_local_model_path(model)
|
||||
|
||||
if model_path is None:
|
||||
raise CommandLineError('Model not found')
|
||||
|
||||
language = parser.value(language_option)
|
||||
if language == '':
|
||||
language = None
|
||||
elif LANGUAGES.get(language) is None:
|
||||
raise CommandLineError('Invalid language option')
|
||||
|
||||
initial_prompt = parser.value(initial_prompt_option)
|
||||
|
||||
output_formats: typing.Set[OutputFormat] = set()
|
||||
if parser.isSet(srt_option):
|
||||
output_formats.add(OutputFormat.SRT)
|
||||
if parser.isSet(vtt_option):
|
||||
output_formats.add(OutputFormat.VTT)
|
||||
if parser.isSet(txt_option):
|
||||
output_formats.add(OutputFormat.TXT)
|
||||
|
||||
openai_access_token = parser.value(open_ai_access_token_option)
|
||||
if model.model_type == ModelType.OPEN_AI_WHISPER_API and openai_access_token == '':
|
||||
openai_access_token = KeyringStore().get_password(key=KeyringStore.Key.OPENAI_API_KEY)
|
||||
|
||||
if openai_access_token == '':
|
||||
raise CommandLineError('No OpenAI access token found')
|
||||
|
||||
transcription_options = TranscriptionOptions(model=model, task=task, language=language,
|
||||
initial_prompt=initial_prompt,
|
||||
openai_access_token=openai_access_token)
|
||||
file_transcription_options = FileTranscriptionOptions(file_paths=file_paths, output_formats=output_formats)
|
||||
|
||||
for file_path in file_paths:
|
||||
transcription_task = FileTranscriptionTask(file_path=file_path, model_path=model_path,
|
||||
transcription_options=transcription_options,
|
||||
file_transcription_options=file_transcription_options)
|
||||
app.add_task(transcription_task)
|
||||
|
||||
|
||||
T = typing.TypeVar("T", bound=enum.Enum)
|
||||
|
||||
|
||||
def parse_enum_option(option: QCommandLineOption, parser: QCommandLineParser, enum_class: typing.Type[T]) -> T:
|
||||
try:
|
||||
return enum_class(parser.value(option))
|
||||
except ValueError:
|
||||
raise CommandLineError(f'Invalid value for --{option.names()[-1]} option.')
|
||||
|
||||
|
||||
def join_values(enum_class: typing.Type[enum.Enum]) -> str:
|
||||
return ', '.join([v.value for v in enum_class])
|
22
buzz/gui.py
22
buzz/gui.py
|
@ -3,11 +3,9 @@ import json
|
|||
import logging
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from enum import auto
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import humanize
|
||||
import sounddevice
|
||||
from PyQt6 import QtGui
|
||||
from PyQt6.QtCore import (QObject, Qt, QThread,
|
||||
|
@ -18,10 +16,9 @@ from PyQt6.QtGui import (QAction, QCloseEvent, QDesktopServices, QIcon,
|
|||
from PyQt6.QtNetwork import QNetworkAccessManager, QNetworkReply, QNetworkRequest
|
||||
from PyQt6.QtWidgets import (QApplication, QCheckBox, QComboBox, QDialog,
|
||||
QDialogButtonBox, QFileDialog, QLabel, QMainWindow, QMessageBox, QPlainTextEdit,
|
||||
QProgressDialog, QPushButton, QVBoxLayout, QHBoxLayout, QWidget, QGroupBox, QTableWidget,
|
||||
QPushButton, QVBoxLayout, QHBoxLayout, QWidget, QGroupBox, QTableWidget,
|
||||
QMenuBar, QFormLayout, QTableWidgetItem,
|
||||
QAbstractItemView, QListWidget, QListWidgetItem, QSizePolicy)
|
||||
from whisper import tokenizer
|
||||
|
||||
from buzz.cache import TasksCache
|
||||
from .__version__ import VERSION
|
||||
|
@ -41,7 +38,7 @@ from .transcriber import (SUPPORTED_OUTPUT_FORMATS, FileTranscriptionOptions, Ou
|
|||
Task,
|
||||
TranscriptionOptions,
|
||||
FileTranscriberQueueWorker, FileTranscriptionTask, RecordingTranscriber, LOADED_WHISPER_DLL,
|
||||
DEFAULT_WHISPER_TEMPERATURE)
|
||||
DEFAULT_WHISPER_TEMPERATURE, LANGUAGES)
|
||||
from .widgets.line_edit import LineEdit
|
||||
from .widgets.model_download_progress_dialog import ModelDownloadProgressDialog
|
||||
from .widgets.model_type_combo_box import ModelTypeComboBox
|
||||
|
@ -112,7 +109,7 @@ class LanguagesComboBox(QComboBox):
|
|||
super().__init__(parent)
|
||||
|
||||
whisper_languages = sorted(
|
||||
[(lang, tokenizer.LANGUAGES[lang].title()) for lang in tokenizer.LANGUAGES], key=lambda lang: lang[1])
|
||||
[(lang, LANGUAGES[lang].title()) for lang in LANGUAGES], key=lambda lang: lang[1])
|
||||
self.languages = [('', _('Detect Language'))] + whisper_languages
|
||||
|
||||
self.addItems([lang[1] for lang in self.languages])
|
||||
|
@ -259,7 +256,7 @@ class FileTranscriberWidget(QWidget):
|
|||
self.transcription_options = transcription_options
|
||||
self.word_level_timings_checkbox.setDisabled(
|
||||
self.transcription_options.model.model_type == ModelType.HUGGING_FACE or self.transcription_options.model.model_type == ModelType.OPEN_AI_WHISPER_API)
|
||||
if self.transcription_options.openai_access_token is not None:
|
||||
if self.transcription_options.openai_access_token != '':
|
||||
self.openai_access_token_changed.emit(self.transcription_options.openai_access_token)
|
||||
|
||||
def on_click_run(self):
|
||||
|
@ -950,7 +947,7 @@ class MainWindow(QMainWindow):
|
|||
for file_path in file_transcription_options.file_paths:
|
||||
task = FileTranscriptionTask(
|
||||
file_path, transcription_options, file_transcription_options, model_path)
|
||||
self.transcriber_worker.add_task(task)
|
||||
self.add_task(task)
|
||||
|
||||
def update_task_table_row(self, task: FileTranscriptionTask):
|
||||
self.table_widget.upsert_task(task)
|
||||
|
@ -1053,6 +1050,9 @@ class MainWindow(QMainWindow):
|
|||
transcription_task=task, parent=self, flags=Qt.WindowType.Window)
|
||||
transcription_viewer_widget.show()
|
||||
|
||||
def add_task(self, task: FileTranscriptionTask):
|
||||
self.transcriber_worker.add_task(task)
|
||||
|
||||
def load_tasks_from_cache(self):
|
||||
tasks = self.tasks_cache.load()
|
||||
for task in tasks:
|
||||
|
@ -1378,9 +1378,15 @@ class Application(QApplication):
|
|||
def __init__(self) -> None:
|
||||
super().__init__(sys.argv)
|
||||
|
||||
self.setApplicationName(APP_NAME)
|
||||
self.setApplicationVersion(VERSION)
|
||||
|
||||
self.window = MainWindow()
|
||||
self.window.show()
|
||||
|
||||
def add_task(self, task: FileTranscriptionTask):
|
||||
self.window.add_task(task)
|
||||
|
||||
|
||||
class AdvancedSettingsDialog(QDialog):
|
||||
transcription_options: TranscriptionOptions
|
||||
|
|
|
@ -7,7 +7,8 @@ APP_NAME = 'Buzz'
|
|||
|
||||
|
||||
class Settings:
|
||||
settings = QSettings(APP_NAME)
|
||||
def __init__(self):
|
||||
self.settings = QSettings(APP_NAME)
|
||||
|
||||
class Key(enum.Enum):
|
||||
RECORDING_TRANSCRIBER_TASK = 'recording-transcriber/task'
|
||||
|
|
|
@ -11,9 +11,9 @@ class KeyringStore:
|
|||
class Key(enum.Enum):
|
||||
OPENAI_API_KEY = 'OpenAI API key'
|
||||
|
||||
def get_password(self, username: Key) -> str:
|
||||
def get_password(self, key: Key) -> str:
|
||||
try:
|
||||
password = keyring.get_password(APP_NAME, username=username.value)
|
||||
password = keyring.get_password(APP_NAME, username=key.value)
|
||||
if password is None:
|
||||
return ''
|
||||
return password
|
||||
|
|
|
@ -5,14 +5,12 @@ import json
|
|||
import logging
|
||||
import multiprocessing
|
||||
import os
|
||||
import platform
|
||||
import queue
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
import threading
|
||||
from abc import ABC, abstractmethod
|
||||
from abc import abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from multiprocessing.connection import Connection
|
||||
from random import randint
|
||||
|
@ -20,16 +18,16 @@ from threading import Thread
|
|||
from typing import Any, List, Optional, Tuple, Union, Set
|
||||
|
||||
import faster_whisper
|
||||
import openai
|
||||
|
||||
import ffmpeg
|
||||
import numpy as np
|
||||
import openai
|
||||
import sounddevice
|
||||
import stable_whisper
|
||||
import tqdm
|
||||
import whisper
|
||||
from PyQt6.QtCore import QObject, QProcess, pyqtSignal, pyqtSlot, QThread
|
||||
from sounddevice import PortAudioError
|
||||
from whisper import tokenizer
|
||||
|
||||
from . import transformers_whisper
|
||||
from .conn import pipe_stderr
|
||||
|
@ -62,6 +60,9 @@ class Segment:
|
|||
text: str
|
||||
|
||||
|
||||
LANGUAGES = tokenizer.LANGUAGES
|
||||
|
||||
|
||||
@dataclass()
|
||||
class TranscriptionOptions:
|
||||
language: Optional[str] = None
|
||||
|
@ -70,7 +71,7 @@ class TranscriptionOptions:
|
|||
word_level_timings: bool = False
|
||||
temperature: Tuple[float, ...] = DEFAULT_WHISPER_TEMPERATURE
|
||||
initial_prompt: str = ''
|
||||
openai_access_token: Optional[str] = None
|
||||
openai_access_token: str = ''
|
||||
|
||||
|
||||
@dataclass()
|
||||
|
|
5
main.py
5
main.py
|
@ -6,9 +6,10 @@ import platform
|
|||
import sys
|
||||
from typing import TextIO
|
||||
|
||||
from PyQt6.QtCore import QTranslator, QLocale
|
||||
from appdirs import user_log_dir
|
||||
|
||||
from buzz.cli import parse_command_line
|
||||
|
||||
# Check for segfaults if not running in frozen mode
|
||||
if getattr(sys, 'frozen', False) is False:
|
||||
faulthandler.enable()
|
||||
|
@ -51,5 +52,5 @@ if __name__ == "__main__":
|
|||
from buzz.gui import Application
|
||||
|
||||
app = Application()
|
||||
|
||||
parse_command_line(app)
|
||||
sys.exit(app.exec())
|
||||
|
|
Loading…
Reference in a new issue