From ab912deaf8a9831bd3552b3714f14d0b22b33ca1 Mon Sep 17 00:00:00 2001 From: Chidi Williams Date: Mon, 8 Jan 2024 01:45:30 +0000 Subject: [PATCH] feat: import URL for transcription (#665) --- .gitignore | 2 +- Makefile | 8 +- buzz/cache.py | 4 +- buzz/cli.py | 2 +- buzz/file_transcriber_queue_worker.py | 25 +- buzz/paths.py | 5 - buzz/settings/shortcut.py | 1 + buzz/transcriber.py | 850 ------------------ buzz/transcriber/__init__.py | 0 buzz/transcriber/file_transcriber.py | 140 +++ .../openai_whisper_api_file_transcriber.py | 120 +++ .../recording_transcriber.py | 3 +- buzz/transcriber/transcriber.py | 259 ++++++ buzz/transcriber/whisper_cpp.py | 70 ++ .../whisper_cpp_file_transcriber.py | 96 ++ buzz/transcriber/whisper_file_transcriber.py | 236 +++++ buzz/widgets/application.py | 2 +- buzz/widgets/import_url_dialog.py | 53 ++ buzz/widgets/main_window.py | 43 +- buzz/widgets/menu_bar.py | 15 +- .../folder_watch_preferences_widget.py | 2 +- .../models/file_transcription_preferences.py | 6 +- buzz/widgets/recording_transcriber_widget.py | 6 +- .../transcriber/advanced_settings_dialog.py | 2 +- .../transcriber/file_transcriber_widget.py | 22 +- .../file_transcription_form_widget.py | 2 +- .../transcriber/languages_combo_box.py | 2 +- buzz/widgets/transcriber/tasks_combo_box.py | 2 +- .../transcription_options_group_box.py | 2 +- .../transcription_task_folder_watcher.py | 2 +- .../transcription_tasks_table_widget.py | 10 +- .../export_transcription_button.py | 4 +- .../transcription_segments_editor_widget.py | 3 +- .../transcription_viewer_widget.py | 2 +- poetry.lock | 287 +++++- pyproject.toml | 1 + tests/cache_test.py | 2 +- tests/gui_test.py | 2 +- tests/transcriber/__init__.py | 0 ...penai_whisper_api_file_transcriber_test.py | 52 ++ .../transcriber/recording_transcriber_test.py | 47 + tests/transcriber/transcriber_test.py | 43 + .../whisper_cpp_file_transcriber_test.py | 76 ++ tests/transcriber/whisper_cpp_test.py | 25 + .../whisper_file_transcriber_test.py} | 267 ++---- tests/widgets/import_url_dialog_test.py | 22 + tests/widgets/main_window_test.py | 50 +- .../folder_watch_preferences_widget_test.py | 2 +- tests/widgets/shortcuts_editor_widget_test.py | 1 + .../transcription_task_folder_watcher_test.py | 2 +- .../transcription_tasks_table_widget_test.py | 2 +- tests/widgets/transcription_viewer_test.py | 2 +- 52 files changed, 1745 insertions(+), 1139 deletions(-) delete mode 100644 buzz/transcriber.py create mode 100644 buzz/transcriber/__init__.py create mode 100644 buzz/transcriber/file_transcriber.py create mode 100644 buzz/transcriber/openai_whisper_api_file_transcriber.py rename buzz/{ => transcriber}/recording_transcriber.py (98%) create mode 100644 buzz/transcriber/transcriber.py create mode 100644 buzz/transcriber/whisper_cpp.py create mode 100644 buzz/transcriber/whisper_cpp_file_transcriber.py create mode 100644 buzz/transcriber/whisper_file_transcriber.py create mode 100644 buzz/widgets/import_url_dialog.py create mode 100644 tests/transcriber/__init__.py create mode 100644 tests/transcriber/openai_whisper_api_file_transcriber_test.py create mode 100644 tests/transcriber/recording_transcriber_test.py create mode 100644 tests/transcriber/transcriber_test.py create mode 100644 tests/transcriber/whisper_cpp_file_transcriber_test.py create mode 100644 tests/transcriber/whisper_cpp_test.py rename tests/{transcriber_test.py => transcriber/whisper_file_transcriber_test.py} (61%) create mode 100644 tests/widgets/import_url_dialog_test.py diff --git a/.gitignore b/.gitignore index d17525ea..3b233105 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,7 @@ libwhisper.* whisper_cpp whisper_cpp.exe whisper.dll -whisper_cpp.py +buzz/whisper_cpp.py # Internationalization - compiled binaries locale/**/*.mo diff --git a/Makefile b/Makefile index 42ae15fd..97aa848d 100644 --- a/Makefile +++ b/Makefile @@ -28,16 +28,16 @@ clean: rm -f buzz/whisper_cpp.py rm -rf dist/* || true -COVERAGE_THRESHOLD := 75 +COVERAGE_THRESHOLD := 76 ifeq ($(UNAME_S),Linux) - COVERAGE_THRESHOLD := 70 + COVERAGE_THRESHOLD := 71 endif test: buzz/whisper_cpp.py translation_mo - pytest -vv --cov=buzz --cov-report=xml --cov-report=html --benchmark-skip --cov-fail-under=${COVERAGE_THRESHOLD} + pytest -s -vv --cov=buzz --cov-report=xml --cov-report=html --benchmark-skip --cov-fail-under=${COVERAGE_THRESHOLD} benchmarks: buzz/whisper_cpp.py translation_mo - pytest -vv --benchmark-only --benchmark-json benchmarks.json + pytest -s -vv --benchmark-only --benchmark-json benchmarks.json dist/Buzz dist/Buzz.app: buzz/whisper_cpp.py translation_mo pyinstaller --noconfirm Buzz.spec diff --git a/buzz/cache.py b/buzz/cache.py index 6617d37f..0446a4ab 100644 --- a/buzz/cache.py +++ b/buzz/cache.py @@ -6,7 +6,7 @@ from typing import List from platformdirs import user_cache_dir -from .transcriber import FileTranscriptionTask +from buzz.transcriber.transcriber import FileTranscriptionTask class TasksCache: @@ -39,7 +39,7 @@ class TasksCache: def load_json_tasks(self) -> List[FileTranscriptionTask]: task_ids: List[int] try: - with open(self.tasks_list_file_path, "r") as file: + with open(self.tasks_list_file_path) as file: task_ids = json.load(file) except json.JSONDecodeError: logging.debug( diff --git a/buzz/cli.py b/buzz/cli.py index b4d9e8ef..eb95d544 100644 --- a/buzz/cli.py +++ b/buzz/cli.py @@ -7,7 +7,7 @@ from PyQt6.QtCore import QCommandLineParser, QCommandLineOption from buzz.widgets.application import Application from buzz.model_loader import ModelType, WhisperModelSize, TranscriptionModel from buzz.store.keyring_store import KeyringStore -from buzz.transcriber import ( +from buzz.transcriber.transcriber import ( Task, FileTranscriptionTask, FileTranscriptionOptions, diff --git a/buzz/file_transcriber_queue_worker.py b/buzz/file_transcriber_queue_worker.py index cfa76abd..c18c7f97 100644 --- a/buzz/file_transcriber_queue_worker.py +++ b/buzz/file_transcriber_queue_worker.py @@ -7,14 +7,13 @@ from typing import Optional, Tuple, List from PyQt6.QtCore import QObject, QThread, pyqtSignal, pyqtSlot from buzz.model_loader import ModelType -from buzz.transcriber import ( - FileTranscriptionTask, - FileTranscriber, - WhisperCppFileTranscriber, +from buzz.transcriber.file_transcriber import FileTranscriber +from buzz.transcriber.openai_whisper_api_file_transcriber import ( OpenAIWhisperAPIFileTranscriber, - WhisperFileTranscriber, - Segment, ) +from buzz.transcriber.transcriber import FileTranscriptionTask, Segment +from buzz.transcriber.whisper_cpp_file_transcriber import WhisperCppFileTranscriber +from buzz.transcriber.whisper_file_transcriber import WhisperFileTranscriber class FileTranscriberQueueWorker(QObject): @@ -81,6 +80,9 @@ class FileTranscriberQueueWorker(QObject): ) self.current_transcriber.progress.connect(self.on_task_progress) + self.current_transcriber.download_progress.connect( + self.on_task_download_progress + ) self.current_transcriber.error.connect(self.on_task_error) self.current_transcriber.completed.connect(self.on_task_completed) @@ -107,14 +109,13 @@ class FileTranscriberQueueWorker(QObject): if self.current_transcriber is not None: self.current_transcriber.stop() - @pyqtSlot(Exception) - def on_task_error(self, error: Exception): + def on_task_error(self, error: str): if ( self.current_task is not None and self.current_task.id not in self.canceled_tasks ): self.current_task.status = FileTranscriptionTask.Status.FAILED - self.current_task.error = str(error) + self.current_task.error = error self.task_updated.emit(self.current_task) @pyqtSlot(tuple) @@ -124,6 +125,12 @@ class FileTranscriberQueueWorker(QObject): self.current_task.fraction_completed = progress[0] / progress[1] self.task_updated.emit(self.current_task) + def on_task_download_progress(self, fraction_downloaded: float): + if self.current_task is not None: + self.current_task.status = FileTranscriptionTask.Status.IN_PROGRESS + self.current_task.fraction_downloaded = fraction_downloaded + self.task_updated.emit(self.current_task) + @pyqtSlot(list) def on_task_completed(self, segments: List[Segment]): if self.current_task is not None: diff --git a/buzz/paths.py b/buzz/paths.py index 9dc7552c..bd2c2d06 100644 --- a/buzz/paths.py +++ b/buzz/paths.py @@ -1,10 +1,5 @@ import os -from typing import List def file_path_as_title(file_path: str): return os.path.basename(file_path) - - -def file_paths_as_title(file_paths: List[str]): - return ", ".join([file_path_as_title(path) for path in file_paths]) diff --git a/buzz/settings/shortcut.py b/buzz/settings/shortcut.py index fbd5bd9a..f9939d9a 100644 --- a/buzz/settings/shortcut.py +++ b/buzz/settings/shortcut.py @@ -15,6 +15,7 @@ class Shortcut(str, enum.Enum): OPEN_RECORD_WINDOW = ("Ctrl+R", "Open Record Window") OPEN_IMPORT_WINDOW = ("Ctrl+O", "Import File") + OPEN_IMPORT_URL_WINDOW = ("Ctrl+U", "Import URL") OPEN_PREFERENCES_WINDOW = ("Ctrl+,", "Open Preferences Window") OPEN_TRANSCRIPT_EDITOR = ("Ctrl+E", "Open Transcript Viewer") diff --git a/buzz/transcriber.py b/buzz/transcriber.py deleted file mode 100644 index 71a16c65..00000000 --- a/buzz/transcriber.py +++ /dev/null @@ -1,850 +0,0 @@ -import ctypes -import datetime -import enum -import json -import logging -import math -import multiprocessing -import os -import shutil -import subprocess -import sys -import tempfile -from abc import abstractmethod -from dataclasses import dataclass, field -from multiprocessing.connection import Connection -from random import randint -from threading import Thread -from typing import Any, List, Optional, Tuple, Union, Set - -import numpy as np -from openai import OpenAI - -import tqdm -from PyQt6.QtCore import QObject, pyqtSignal, pyqtSlot -from dataclasses_json import dataclass_json, config, Exclude - -from buzz.model_loader import whisper_cpp -from . import transformers_whisper, whisper_audio -from .conn import pipe_stderr -from .locale import _ -from .model_loader import TranscriptionModel, ModelType - -if sys.platform != "linux": - import faster_whisper - import whisper - import stable_whisper - -DEFAULT_WHISPER_TEMPERATURE = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0) - - -class Task(enum.Enum): - TRANSLATE = "translate" - TRANSCRIBE = "transcribe" - - -@dataclass -class Segment: - start: int # start time in ms - end: int # end time in ms - text: str - - -LANGUAGES = { - "en": "english", - "zh": "chinese", - "de": "german", - "es": "spanish", - "ru": "russian", - "ko": "korean", - "fr": "french", - "ja": "japanese", - "pt": "portuguese", - "tr": "turkish", - "pl": "polish", - "ca": "catalan", - "nl": "dutch", - "ar": "arabic", - "sv": "swedish", - "it": "italian", - "id": "indonesian", - "hi": "hindi", - "fi": "finnish", - "vi": "vietnamese", - "he": "hebrew", - "uk": "ukrainian", - "el": "greek", - "ms": "malay", - "cs": "czech", - "ro": "romanian", - "da": "danish", - "hu": "hungarian", - "ta": "tamil", - "no": "norwegian", - "th": "thai", - "ur": "urdu", - "hr": "croatian", - "bg": "bulgarian", - "lt": "lithuanian", - "la": "latin", - "mi": "maori", - "ml": "malayalam", - "cy": "welsh", - "sk": "slovak", - "te": "telugu", - "fa": "persian", - "lv": "latvian", - "bn": "bengali", - "sr": "serbian", - "az": "azerbaijani", - "sl": "slovenian", - "kn": "kannada", - "et": "estonian", - "mk": "macedonian", - "br": "breton", - "eu": "basque", - "is": "icelandic", - "hy": "armenian", - "ne": "nepali", - "mn": "mongolian", - "bs": "bosnian", - "kk": "kazakh", - "sq": "albanian", - "sw": "swahili", - "gl": "galician", - "mr": "marathi", - "pa": "punjabi", - "si": "sinhala", - "km": "khmer", - "sn": "shona", - "yo": "yoruba", - "so": "somali", - "af": "afrikaans", - "oc": "occitan", - "ka": "georgian", - "be": "belarusian", - "tg": "tajik", - "sd": "sindhi", - "gu": "gujarati", - "am": "amharic", - "yi": "yiddish", - "lo": "lao", - "uz": "uzbek", - "fo": "faroese", - "ht": "haitian creole", - "ps": "pashto", - "tk": "turkmen", - "nn": "nynorsk", - "mt": "maltese", - "sa": "sanskrit", - "lb": "luxembourgish", - "my": "myanmar", - "bo": "tibetan", - "tl": "tagalog", - "mg": "malagasy", - "as": "assamese", - "tt": "tatar", - "haw": "hawaiian", - "ln": "lingala", - "ha": "hausa", - "ba": "bashkir", - "jw": "javanese", - "su": "sundanese", - "yue": "cantonese", -} - - -@dataclass() -class TranscriptionOptions: - language: Optional[str] = None - task: Task = Task.TRANSCRIBE - model: TranscriptionModel = field(default_factory=TranscriptionModel) - word_level_timings: bool = False - temperature: Tuple[float, ...] = DEFAULT_WHISPER_TEMPERATURE - initial_prompt: str = "" - openai_access_token: str = field( - default="", metadata=config(exclude=Exclude.ALWAYS) - ) - - -def humanize_language(language: str) -> str: - if language == "": - return _("Detect Language") - return LANGUAGES[language].title() - - -@dataclass() -class FileTranscriptionOptions: - file_paths: List[str] - output_formats: Set["OutputFormat"] = field(default_factory=set) - default_output_file_name: str = "" - - -@dataclass_json -@dataclass -class FileTranscriptionTask: - class Status(enum.Enum): - QUEUED = "queued" - IN_PROGRESS = "in_progress" - COMPLETED = "completed" - FAILED = "failed" - CANCELED = "canceled" - - class Source(enum.Enum): - FILE_IMPORT = "file_import" - FOLDER_WATCH = "folder_watch" - - file_path: str - transcription_options: TranscriptionOptions - file_transcription_options: FileTranscriptionOptions - model_path: str - id: int = field(default_factory=lambda: randint(0, 100_000_000)) - segments: List[Segment] = field(default_factory=list) - status: Optional[Status] = None - fraction_completed = 0.0 - error: Optional[str] = None - queued_at: Optional[datetime.datetime] = None - started_at: Optional[datetime.datetime] = None - completed_at: Optional[datetime.datetime] = None - output_directory: Optional[str] = None - source: Source = Source.FILE_IMPORT - - def status_text(self) -> str: - if self.status == FileTranscriptionTask.Status.IN_PROGRESS: - return f'{_("In Progress")} ({self.fraction_completed :.0%})' - elif self.status == FileTranscriptionTask.Status.COMPLETED: - status = _("Completed") - if self.started_at is not None and self.completed_at is not None: - status += ( - f" ({self.format_timedelta(self.completed_at - self.started_at)})" - ) - return status - elif self.status == FileTranscriptionTask.Status.FAILED: - return f'{_("Failed")} ({self.error})' - elif self.status == FileTranscriptionTask.Status.CANCELED: - return _("Canceled") - elif self.status == FileTranscriptionTask.Status.QUEUED: - return _("Queued") - return "" - - @staticmethod - def format_timedelta(delta: datetime.timedelta): - mm, ss = divmod(delta.seconds, 60) - result = f"{ss}s" - if mm == 0: - return result - hh, mm = divmod(mm, 60) - result = f"{mm}m {result}" - if hh == 0: - return result - return f"{hh}h {result}" - - -class OutputFormat(enum.Enum): - TXT = "txt" - SRT = "srt" - VTT = "vtt" - - -class FileTranscriber(QObject): - transcription_task: FileTranscriptionTask - progress = pyqtSignal(tuple) # (current, total) - completed = pyqtSignal(list) # List[Segment] - error = pyqtSignal(Exception) - - def __init__(self, task: FileTranscriptionTask, parent: Optional["QObject"] = None): - super().__init__(parent) - self.transcription_task = task - - @pyqtSlot() - def run(self): - try: - segments = self.transcribe() - except Exception as exc: - logging.error(exc) - self.error.emit(exc) - return - - self.completed.emit(segments) - - for ( - output_format - ) in self.transcription_task.file_transcription_options.output_formats: - default_path = get_output_file_path( - task=self.transcription_task, output_format=output_format - ) - - write_output( - path=default_path, segments=segments, output_format=output_format - ) - - if self.transcription_task.source == FileTranscriptionTask.Source.FOLDER_WATCH: - shutil.move( - self.transcription_task.file_path, - os.path.join( - self.transcription_task.output_directory, - os.path.basename(self.transcription_task.file_path), - ), - ) - - @abstractmethod - def transcribe(self) -> List[Segment]: - ... - - @abstractmethod - def stop(self): - ... - - -class Stopped(Exception): - pass - - -class WhisperCppFileTranscriber(FileTranscriber): - duration_audio_ms = sys.maxsize # max int - state: "WhisperCppFileTranscriber.State" - - class State: - running = True - - def __init__( - self, task: FileTranscriptionTask, parent: Optional["QObject"] = None - ) -> None: - super().__init__(task, parent) - - self.file_path = task.file_path - self.language = task.transcription_options.language - self.model_path = task.model_path - self.task = task.transcription_options.task - self.word_level_timings = task.transcription_options.word_level_timings - self.state = self.State() - - def transcribe(self) -> List[Segment]: - self.state.running = True - model_path = self.model_path - - logging.debug( - "Starting whisper_cpp file transcription, file path = %s, language = %s, " - "task = %s, model_path = %s, word level timings = %s", - self.file_path, - self.language, - self.task, - model_path, - self.word_level_timings, - ) - - audio = whisper_audio.load_audio(self.file_path) - self.duration_audio_ms = len(audio) * 1000 / whisper_audio.SAMPLE_RATE - - whisper_params = whisper_cpp_params( - language=self.language if self.language is not None else "", - task=self.task, - word_level_timings=self.word_level_timings, - ) - whisper_params.encoder_begin_callback_user_data = ctypes.c_void_p( - id(self.state) - ) - whisper_params.encoder_begin_callback = ( - whisper_cpp.whisper_encoder_begin_callback(self.encoder_begin_callback) - ) - whisper_params.new_segment_callback_user_data = ctypes.c_void_p(id(self.state)) - whisper_params.new_segment_callback = whisper_cpp.whisper_new_segment_callback( - self.new_segment_callback - ) - - model = WhisperCpp(model=model_path) - result = model.transcribe(audio=self.file_path, params=whisper_params) - - if not self.state.running: - raise Stopped - - self.state.running = False - return result["segments"] - - def new_segment_callback(self, ctx, _state, _n_new, user_data): - n_segments = whisper_cpp.whisper_full_n_segments(ctx) - t1 = whisper_cpp.whisper_full_get_segment_t1(ctx, n_segments - 1) - # t1 seems to sometimes be larger than the duration when the - # audio ends in silence. Trim to fix the displayed progress. - progress = min(t1 * 10, self.duration_audio_ms) - state: WhisperCppFileTranscriber.State = ctypes.cast( - user_data, ctypes.py_object - ).value - if state.running: - self.progress.emit((progress, self.duration_audio_ms)) - - @staticmethod - def encoder_begin_callback(_ctx, _state, user_data): - state: WhisperCppFileTranscriber.State = ctypes.cast( - user_data, ctypes.py_object - ).value - return state.running == 1 - - def stop(self): - self.state.running = False - - -class OpenAIWhisperAPIFileTranscriber(FileTranscriber): - def __init__(self, task: FileTranscriptionTask, parent: Optional["QObject"] = None): - super().__init__(task=task, parent=parent) - self.file_path = task.file_path - self.task = task.transcription_options.task - self.openai_client = OpenAI( - api_key=self.transcription_task.transcription_options.openai_access_token - ) - - def transcribe(self) -> List[Segment]: - logging.debug( - "Starting OpenAI Whisper API file transcription, file path = %s, task = %s", - self.file_path, - self.task, - ) - - mp3_file = tempfile.mktemp() + ".mp3" - - cmd = ["ffmpeg", "-i", self.file_path, mp3_file] - - try: - subprocess.run(cmd, capture_output=True, check=True) - except subprocess.CalledProcessError as exc: - logging.exception("") - raise Exception(exc.stderr.decode("utf-8")) - - # fmt: off - cmd = [ - "ffprobe", - "-v", "error", - "-show_entries", "format=duration", - "-of", "default=noprint_wrappers=1:nokey=1", - mp3_file, - ] - # fmt: on - duration_secs = float( - subprocess.run(cmd, capture_output=True, check=True).stdout.decode("utf-8") - ) - - total_size = os.path.getsize(mp3_file) - max_chunk_size = 25 * 1024 * 1024 - - self.progress.emit((0, 100)) - - if total_size < max_chunk_size: - return self.get_segments_for_file(mp3_file) - - # If the file is larger than 25MB, split into chunks - # and transcribe each chunk separately - num_chunks = math.ceil(total_size / max_chunk_size) - chunk_duration = duration_secs / num_chunks - - segments = [] - - for i in range(num_chunks): - chunk_start = i * chunk_duration - chunk_end = min((i + 1) * chunk_duration, duration_secs) - - chunk_file = tempfile.mktemp() + ".mp3" - - # fmt: off - cmd = [ - "ffmpeg", - "-i", mp3_file, - "-ss", str(chunk_start), - "-to", str(chunk_end), - "-c", "copy", - chunk_file, - ] - # fmt: on - subprocess.run(cmd, capture_output=True, check=True) - logging.debug('Created chunk file "%s"', chunk_file) - - segments.extend( - self.get_segments_for_file( - chunk_file, offset_ms=int(chunk_start * 1000) - ) - ) - os.remove(chunk_file) - self.progress.emit((i + 1, num_chunks)) - - return segments - - def get_segments_for_file(self, file: str, offset_ms: int = 0): - kwargs = { - "model": "whisper-1", - "file": file, - "response_format": "verbose_json", - "language": self.transcription_task.transcription_options.language, - } - transcript = ( - self.openai_client.audio.transcriptions.create(**kwargs) - if self.transcription_task.transcription_options.task == Task.TRANSLATE - else self.openai_client.audio.translations.create(**kwargs) - ) - - return [ - Segment( - int(segment["start"] * 1000 + offset_ms), - int(segment["end"] * 1000 + offset_ms), - segment["text"], - ) - for segment in transcript["segments"] - ] - - def stop(self): - pass - - -class WhisperFileTranscriber(FileTranscriber): - """WhisperFileTranscriber transcribes an audio file to text, writes the text to a file, and then opens the file - using the default program for opening txt files.""" - - current_process: multiprocessing.Process - running = False - read_line_thread: Optional[Thread] = None - READ_LINE_THREAD_STOP_TOKEN = "--STOP--" - - def __init__( - self, task: FileTranscriptionTask, parent: Optional["QObject"] = None - ) -> None: - super().__init__(task, parent) - self.segments = [] - self.started_process = False - self.stopped = False - - def transcribe(self) -> List[Segment]: - time_started = datetime.datetime.now() - logging.debug( - "Starting whisper file transcription, task = %s", self.transcription_task - ) - - recv_pipe, send_pipe = multiprocessing.Pipe(duplex=False) - - self.current_process = multiprocessing.Process( - target=self.transcribe_whisper, args=(send_pipe, self.transcription_task) - ) - if not self.stopped: - self.current_process.start() - self.started_process = True - - self.read_line_thread = Thread(target=self.read_line, args=(recv_pipe,)) - self.read_line_thread.start() - - self.current_process.join() - - if self.current_process.exitcode != 0: - send_pipe.close() - - self.read_line_thread.join() - - logging.debug( - "whisper process completed with code = %s, time taken = %s, number of segments = %s", - self.current_process.exitcode, - datetime.datetime.now() - time_started, - len(self.segments), - ) - - if self.current_process.exitcode != 0: - raise Exception("Unknown error") - - return self.segments - - @classmethod - def transcribe_whisper( - cls, stderr_conn: Connection, task: FileTranscriptionTask - ) -> None: - with pipe_stderr(stderr_conn): - if task.transcription_options.model.model_type == ModelType.HUGGING_FACE: - segments = cls.transcribe_hugging_face(task) - elif ( - task.transcription_options.model.model_type == ModelType.FASTER_WHISPER - ): - segments = cls.transcribe_faster_whisper(task) - elif task.transcription_options.model.model_type == ModelType.WHISPER: - segments = cls.transcribe_openai_whisper(task) - else: - raise Exception( - f"Invalid model type: {task.transcription_options.model.model_type}" - ) - - segments_json = json.dumps(segments, ensure_ascii=True, default=vars) - sys.stderr.write(f"segments = {segments_json}\n") - sys.stderr.write(WhisperFileTranscriber.READ_LINE_THREAD_STOP_TOKEN + "\n") - - @classmethod - def transcribe_hugging_face(cls, task: FileTranscriptionTask) -> List[Segment]: - model = transformers_whisper.load_model(task.model_path) - language = ( - task.transcription_options.language - if task.transcription_options.language is not None - else "en" - ) - result = model.transcribe( - audio=task.file_path, - language=language, - task=task.transcription_options.task.value, - verbose=False, - ) - return [ - Segment( - start=int(segment.get("start") * 1000), - end=int(segment.get("end") * 1000), - text=segment.get("text"), - ) - for segment in result.get("segments") - ] - - @classmethod - def transcribe_faster_whisper(cls, task: FileTranscriptionTask) -> List[Segment]: - model = faster_whisper.WhisperModel( - model_size_or_path=task.transcription_options.model.whisper_model_size.to_faster_whisper_model_size() - ) - whisper_segments, info = model.transcribe( - audio=task.file_path, - language=task.transcription_options.language, - task=task.transcription_options.task.value, - temperature=task.transcription_options.temperature, - initial_prompt=task.transcription_options.initial_prompt, - word_timestamps=task.transcription_options.word_level_timings, - ) - segments = [] - with tqdm.tqdm(total=round(info.duration, 2), unit=" seconds") as pbar: - for segment in list(whisper_segments): - # Segment will contain words if word-level timings is True - if segment.words: - for word in segment.words: - segments.append( - Segment( - start=int(word.start * 1000), - end=int(word.end * 1000), - text=word.word, - ) - ) - else: - segments.append( - Segment( - start=int(segment.start * 1000), - end=int(segment.end * 1000), - text=segment.text, - ) - ) - - pbar.update(segment.end - segment.start) - return segments - - @classmethod - def transcribe_openai_whisper(cls, task: FileTranscriptionTask) -> List[Segment]: - model = whisper.load_model(task.model_path) - - if task.transcription_options.word_level_timings: - stable_whisper.modify_model(model) - result = model.transcribe( - audio=task.file_path, - language=task.transcription_options.language, - task=task.transcription_options.task.value, - temperature=task.transcription_options.temperature, - initial_prompt=task.transcription_options.initial_prompt, - pbar=True, - ) - segments = stable_whisper.group_word_timestamps(result) - return [ - Segment( - start=int(segment.get("start") * 1000), - end=int(segment.get("end") * 1000), - text=segment.get("text"), - ) - for segment in segments - ] - - result = model.transcribe( - audio=task.file_path, - language=task.transcription_options.language, - task=task.transcription_options.task.value, - temperature=task.transcription_options.temperature, - initial_prompt=task.transcription_options.initial_prompt, - verbose=False, - ) - segments = result.get("segments") - return [ - Segment( - start=int(segment.get("start") * 1000), - end=int(segment.get("end") * 1000), - text=segment.get("text"), - ) - for segment in segments - ] - - def stop(self): - self.stopped = True - if self.started_process: - self.current_process.terminate() - - def read_line(self, pipe: Connection): - while True: - try: - line = pipe.recv().strip() - except EOFError: # Connection closed - break - - if line == self.READ_LINE_THREAD_STOP_TOKEN: - return - - if line.startswith("segments = "): - segments_dict = json.loads(line[11:]) - segments = [ - Segment( - start=segment.get("start"), - end=segment.get("end"), - text=segment.get("text"), - ) - for segment in segments_dict - ] - self.segments = segments - else: - try: - progress = int(line.split("|")[0].strip().strip("%")) - self.progress.emit((progress, 100)) - except ValueError: - logging.debug("whisper (stderr): %s", line) - continue - - -def write_output(path: str, segments: List[Segment], output_format: OutputFormat): - logging.debug( - "Writing transcription output, path = %s, output format = %s, number of segments = %s", - path, - output_format, - len(segments), - ) - - with open(path, "w", encoding="utf-8") as file: - if output_format == OutputFormat.TXT: - for i, segment in enumerate(segments): - file.write(segment.text) - file.write("\n") - - elif output_format == OutputFormat.VTT: - file.write("WEBVTT\n\n") - for segment in segments: - file.write( - f"{to_timestamp(segment.start)} --> {to_timestamp(segment.end)}\n" - ) - file.write(f"{segment.text}\n\n") - - elif output_format == OutputFormat.SRT: - for i, segment in enumerate(segments): - file.write(f"{i + 1}\n") - file.write( - f'{to_timestamp(segment.start, ms_separator=",")} --> {to_timestamp(segment.end, ms_separator=",")}\n' - ) - file.write(f"{segment.text}\n\n") - - logging.debug("Written transcription output") - - -def segments_to_text(segments: List[Segment]) -> str: - result = "" - for i, segment in enumerate(segments): - result += f"{to_timestamp(segment.start)} --> {to_timestamp(segment.end)}\n" - result += f"{segment.text}" - if i < len(segments) - 1: - result += "\n\n" - return result - - -def to_timestamp(ms: float, ms_separator=".") -> str: - hr = int(ms / (1000 * 60 * 60)) - ms -= hr * (1000 * 60 * 60) - min = int(ms / (1000 * 60)) - ms -= min * (1000 * 60) - sec = int(ms / 1000) - ms = int(ms - sec * 1000) - return f"{hr:02d}:{min:02d}:{sec:02d}{ms_separator}{ms:03d}" - - -SUPPORTED_AUDIO_FORMATS = "Audio files (*.mp3 *.wav *.m4a *.ogg);;\ -Video files (*.mp4 *.webm *.ogm *.mov);;All files (*.*)" - - -def get_output_file_path(task: FileTranscriptionTask, output_format: OutputFormat): - input_file_name = os.path.splitext(os.path.basename(task.file_path))[0] - date_time_now = datetime.datetime.now().strftime("%d-%b-%Y %H-%M-%S") - output_file_name = ( - task.file_transcription_options.default_output_file_name.replace( - "{{ input_file_name }}", input_file_name - ) - .replace("{{ task }}", task.transcription_options.task.value) - .replace("{{ language }}", task.transcription_options.language or "") - .replace("{{ model_type }}", task.transcription_options.model.model_type.value) - .replace( - "{{ model_size }}", - task.transcription_options.model.whisper_model_size.value - if task.transcription_options.model.whisper_model_size is not None - else "", - ) - .replace("{{ date_time }}", date_time_now) - + f".{output_format.value}" - ) - - output_directory = task.output_directory or os.path.dirname(task.file_path) - return os.path.join(output_directory, output_file_name) - - -def whisper_cpp_params( - language: str, - task: Task, - word_level_timings: bool, - print_realtime=False, - print_progress=False, -): - params = whisper_cpp.whisper_full_default_params( - whisper_cpp.WHISPER_SAMPLING_GREEDY - ) - params.print_realtime = print_realtime - params.print_progress = print_progress - params.language = whisper_cpp.String(language.encode("utf-8")) - params.translate = task == Task.TRANSLATE - params.max_len = ctypes.c_int(1) - params.max_len = 1 if word_level_timings else 0 - params.token_timestamps = word_level_timings - return params - - -class WhisperCpp: - def __init__(self, model: str) -> None: - self.ctx = whisper_cpp.whisper_init_from_file(model.encode("utf-8")) - - def transcribe(self, audio: Union[np.ndarray, str], params: Any): - if isinstance(audio, str): - audio = whisper_audio.load_audio(audio) - - logging.debug("Loaded audio with length = %s", len(audio)) - - whisper_cpp_audio = audio.ctypes.data_as(ctypes.POINTER(ctypes.c_float)) - result = whisper_cpp.whisper_full( - self.ctx, params, whisper_cpp_audio, len(audio) - ) - if result != 0: - raise Exception(f"Error from whisper.cpp: {result}") - - segments: List[Segment] = [] - - n_segments = whisper_cpp.whisper_full_n_segments((self.ctx)) - for i in range(n_segments): - txt = whisper_cpp.whisper_full_get_segment_text((self.ctx), i) - t0 = whisper_cpp.whisper_full_get_segment_t0((self.ctx), i) - t1 = whisper_cpp.whisper_full_get_segment_t1((self.ctx), i) - - segments.append( - Segment( - start=t0 * 10, # centisecond to ms - end=t1 * 10, # centisecond to ms - text=txt.decode("utf-8"), - ) - ) - - return { - "segments": segments, - "text": "".join([segment.text for segment in segments]), - } - - def __del__(self): - whisper_cpp.whisper_free(self.ctx) diff --git a/buzz/transcriber/__init__.py b/buzz/transcriber/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/buzz/transcriber/file_transcriber.py b/buzz/transcriber/file_transcriber.py new file mode 100644 index 00000000..ebd25d01 --- /dev/null +++ b/buzz/transcriber/file_transcriber.py @@ -0,0 +1,140 @@ +import logging +import os +import shutil +import tempfile +from abc import abstractmethod +from typing import Optional, List + +from PyQt6.QtCore import QObject, pyqtSignal, pyqtSlot +from yt_dlp import YoutubeDL +from yt_dlp.utils import DownloadError + +from buzz.transcriber.transcriber import ( + FileTranscriptionTask, + get_output_file_path, + Segment, + OutputFormat, +) + + +class FileTranscriber(QObject): + transcription_task: FileTranscriptionTask + progress = pyqtSignal(tuple) # (current, total) + download_progress = pyqtSignal(float) + completed = pyqtSignal(list) # List[Segment] + error = pyqtSignal(str) + + def __init__(self, task: FileTranscriptionTask, parent: Optional["QObject"] = None): + super().__init__(parent) + self.transcription_task = task + + @pyqtSlot() + def run(self): + if self.transcription_task.source == FileTranscriptionTask.Source.URL_IMPORT: + temp_output_path = tempfile.mktemp() + + ydl = YoutubeDL( + { + "format": "wav/bestaudio/best", + "progress_hooks": [self.on_download_progress], + "outtmpl": temp_output_path, + "postprocessors": [ + { + "key": "FFmpegExtractAudio", + "preferredcodec": "wav", + } + ], + } + ) + + try: + ydl.download([self.transcription_task.url]) + except DownloadError as exc: + self.error.emit(exc.msg) + return + + self.transcription_task.file_path = temp_output_path + ".wav" + + try: + segments = self.transcribe() + except Exception as exc: + logging.error(exc) + self.error.emit(str(exc)) + return + + self.completed.emit(segments) + + for ( + output_format + ) in self.transcription_task.file_transcription_options.output_formats: + default_path = get_output_file_path( + task=self.transcription_task, output_format=output_format + ) + + write_output( + path=default_path, segments=segments, output_format=output_format + ) + + if self.transcription_task.source == FileTranscriptionTask.Source.FOLDER_WATCH: + shutil.move( + self.transcription_task.file_path, + os.path.join( + self.transcription_task.output_directory, + os.path.basename(self.transcription_task.file_path), + ), + ) + + def on_download_progress(self, data: dict): + if data["status"] == "downloading": + self.download_progress.emit(data["downloaded_bytes"] / data["total_bytes"]) + + @abstractmethod + def transcribe(self) -> List[Segment]: + ... + + @abstractmethod + def stop(self): + ... + + +def write_output(path: str, segments: List[Segment], output_format: OutputFormat): + logging.debug( + "Writing transcription output, path = %s, output format = %s, number of segments = %s", + path, + output_format, + len(segments), + ) + + with open(path, "w", encoding="utf-8") as file: + if output_format == OutputFormat.TXT: + for i, segment in enumerate(segments): + file.write(segment.text) + file.write("\n") + + elif output_format == OutputFormat.VTT: + file.write("WEBVTT\n\n") + for segment in segments: + file.write( + f"{to_timestamp(segment.start)} --> {to_timestamp(segment.end)}\n" + ) + file.write(f"{segment.text}\n\n") + + elif output_format == OutputFormat.SRT: + for i, segment in enumerate(segments): + file.write(f"{i + 1}\n") + file.write( + f'{to_timestamp(segment.start, ms_separator=",")} --> {to_timestamp(segment.end, ms_separator=",")}\n' + ) + file.write(f"{segment.text}\n\n") + + logging.debug("Written transcription output") + + +def to_timestamp(ms: float, ms_separator=".") -> str: + hr = int(ms / (1000 * 60 * 60)) + ms -= hr * (1000 * 60 * 60) + min = int(ms / (1000 * 60)) + ms -= min * (1000 * 60) + sec = int(ms / 1000) + ms = int(ms - sec * 1000) + return f"{hr:02d}:{min:02d}:{sec:02d}{ms_separator}{ms:03d}" diff --git a/buzz/transcriber/openai_whisper_api_file_transcriber.py b/buzz/transcriber/openai_whisper_api_file_transcriber.py new file mode 100644 index 00000000..c0e44439 --- /dev/null +++ b/buzz/transcriber/openai_whisper_api_file_transcriber.py @@ -0,0 +1,120 @@ +import logging +import math +import os +import subprocess +import tempfile +from typing import Optional, List + +from PyQt6.QtCore import QObject +from openai import OpenAI + +from buzz.transcriber.file_transcriber import FileTranscriber +from buzz.transcriber.transcriber import FileTranscriptionTask, Segment, Task + + +class OpenAIWhisperAPIFileTranscriber(FileTranscriber): + def __init__(self, task: FileTranscriptionTask, parent: Optional["QObject"] = None): + super().__init__(task=task, parent=parent) + self.task = task.transcription_options.task + self.openai_client = OpenAI( + api_key=self.transcription_task.transcription_options.openai_access_token + ) + + def transcribe(self) -> List[Segment]: + logging.debug( + "Starting OpenAI Whisper API file transcription, file path = %s, task = %s", + self.transcription_task.file_path, + self.task, + ) + + mp3_file = tempfile.mktemp() + ".mp3" + + cmd = ["ffmpeg", "-i", self.transcription_task.file_path, mp3_file] + + try: + subprocess.run(cmd, capture_output=True, check=True) + except subprocess.CalledProcessError as exc: + logging.exception("") + raise Exception(exc.stderr.decode("utf-8")) + + # fmt: off + cmd = [ + "ffprobe", + "-v", "error", + "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", + mp3_file, + ] + # fmt: on + duration_secs = float( + subprocess.run(cmd, capture_output=True, check=True).stdout.decode("utf-8") + ) + + total_size = os.path.getsize(mp3_file) + max_chunk_size = 25 * 1024 * 1024 + + self.progress.emit((0, 100)) + + if total_size < max_chunk_size: + return self.get_segments_for_file(mp3_file) + + # If the file is larger than 25MB, split into chunks + # and transcribe each chunk separately + num_chunks = math.ceil(total_size / max_chunk_size) + chunk_duration = duration_secs / num_chunks + + segments = [] + + for i in range(num_chunks): + chunk_start = i * chunk_duration + chunk_end = min((i + 1) * chunk_duration, duration_secs) + + chunk_file = tempfile.mktemp() + ".mp3" + + # fmt: off + cmd = [ + "ffmpeg", + "-i", mp3_file, + "-ss", str(chunk_start), + "-to", str(chunk_end), + "-c", "copy", + chunk_file, + ] + # fmt: on + subprocess.run(cmd, capture_output=True, check=True) + logging.debug('Created chunk file "%s"', chunk_file) + + segments.extend( + self.get_segments_for_file( + chunk_file, offset_ms=int(chunk_start * 1000) + ) + ) + os.remove(chunk_file) + self.progress.emit((i + 1, num_chunks)) + + return segments + + def get_segments_for_file(self, file: str, offset_ms: int = 0): + kwargs = { + "model": "whisper-1", + "file": file, + "response_format": "verbose_json", + "language": self.transcription_task.transcription_options.language, + } + transcript = ( + self.openai_client.audio.transcriptions.create(**kwargs) + if self.transcription_task.transcription_options.task == Task.TRANSLATE + else self.openai_client.audio.translations.create(**kwargs) + ) + + return [ + Segment( + int(segment["start"] * 1000 + offset_ms), + int(segment["end"] * 1000 + offset_ms), + segment["text"], + ) + for segment in transcript["segments"] + ] + + def stop(self): + pass diff --git a/buzz/recording_transcriber.py b/buzz/transcriber/recording_transcriber.py similarity index 98% rename from buzz/recording_transcriber.py rename to buzz/transcriber/recording_transcriber.py index 24bad0c8..3128666d 100644 --- a/buzz/recording_transcriber.py +++ b/buzz/transcriber/recording_transcriber.py @@ -11,7 +11,8 @@ from sounddevice import PortAudioError from buzz import transformers_whisper, whisper_audio from buzz.model_loader import ModelType -from buzz.transcriber import TranscriptionOptions, WhisperCpp, whisper_cpp_params +from buzz.transcriber.transcriber import TranscriptionOptions +from buzz.transcriber.whisper_cpp import WhisperCpp, whisper_cpp_params from buzz.transformers_whisper import TransformersWhisper if sys.platform != "linux": diff --git a/buzz/transcriber/transcriber.py b/buzz/transcriber/transcriber.py new file mode 100644 index 00000000..cbe312b5 --- /dev/null +++ b/buzz/transcriber/transcriber.py @@ -0,0 +1,259 @@ +import datetime +import enum +import os +from dataclasses import dataclass, field +from random import randint +from typing import List, Optional, Tuple, Set + +from dataclasses_json import dataclass_json, config, Exclude + +from buzz.locale import _ +from buzz.model_loader import TranscriptionModel + +DEFAULT_WHISPER_TEMPERATURE = (0.0, 0.2, 0.4, 0.6, 0.8, 1.0) + + +class Task(enum.Enum): + TRANSLATE = "translate" + TRANSCRIBE = "transcribe" + + +@dataclass +class Segment: + start: int # start time in ms + end: int # end time in ms + text: str + + +LANGUAGES = { + "en": "english", + "zh": "chinese", + "de": "german", + "es": "spanish", + "ru": "russian", + "ko": "korean", + "fr": "french", + "ja": "japanese", + "pt": "portuguese", + "tr": "turkish", + "pl": "polish", + "ca": "catalan", + "nl": "dutch", + "ar": "arabic", + "sv": "swedish", + "it": "italian", + "id": "indonesian", + "hi": "hindi", + "fi": "finnish", + "vi": "vietnamese", + "he": "hebrew", + "uk": "ukrainian", + "el": "greek", + "ms": "malay", + "cs": "czech", + "ro": "romanian", + "da": "danish", + "hu": "hungarian", + "ta": "tamil", + "no": "norwegian", + "th": "thai", + "ur": "urdu", + "hr": "croatian", + "bg": "bulgarian", + "lt": "lithuanian", + "la": "latin", + "mi": "maori", + "ml": "malayalam", + "cy": "welsh", + "sk": "slovak", + "te": "telugu", + "fa": "persian", + "lv": "latvian", + "bn": "bengali", + "sr": "serbian", + "az": "azerbaijani", + "sl": "slovenian", + "kn": "kannada", + "et": "estonian", + "mk": "macedonian", + "br": "breton", + "eu": "basque", + "is": "icelandic", + "hy": "armenian", + "ne": "nepali", + "mn": "mongolian", + "bs": "bosnian", + "kk": "kazakh", + "sq": "albanian", + "sw": "swahili", + "gl": "galician", + "mr": "marathi", + "pa": "punjabi", + "si": "sinhala", + "km": "khmer", + "sn": "shona", + "yo": "yoruba", + "so": "somali", + "af": "afrikaans", + "oc": "occitan", + "ka": "georgian", + "be": "belarusian", + "tg": "tajik", + "sd": "sindhi", + "gu": "gujarati", + "am": "amharic", + "yi": "yiddish", + "lo": "lao", + "uz": "uzbek", + "fo": "faroese", + "ht": "haitian creole", + "ps": "pashto", + "tk": "turkmen", + "nn": "nynorsk", + "mt": "maltese", + "sa": "sanskrit", + "lb": "luxembourgish", + "my": "myanmar", + "bo": "tibetan", + "tl": "tagalog", + "mg": "malagasy", + "as": "assamese", + "tt": "tatar", + "haw": "hawaiian", + "ln": "lingala", + "ha": "hausa", + "ba": "bashkir", + "jw": "javanese", + "su": "sundanese", + "yue": "cantonese", +} + + +@dataclass() +class TranscriptionOptions: + language: Optional[str] = None + task: Task = Task.TRANSCRIBE + model: TranscriptionModel = field(default_factory=TranscriptionModel) + word_level_timings: bool = False + temperature: Tuple[float, ...] = DEFAULT_WHISPER_TEMPERATURE + initial_prompt: str = "" + openai_access_token: str = field( + default="", metadata=config(exclude=Exclude.ALWAYS) + ) + + +def humanize_language(language: str) -> str: + if language == "": + return _("Detect Language") + return LANGUAGES[language].title() + + +@dataclass() +class FileTranscriptionOptions: + file_paths: Optional[List[str]] = None + url: Optional[str] = None + output_formats: Set["OutputFormat"] = field(default_factory=set) + default_output_file_name: str = "" + + +@dataclass_json +@dataclass +class FileTranscriptionTask: + class Status(enum.Enum): + QUEUED = "queued" + IN_PROGRESS = "in_progress" + COMPLETED = "completed" + FAILED = "failed" + CANCELED = "canceled" + + class Source(enum.Enum): + FILE_IMPORT = "file_import" + URL_IMPORT = "url_import" + FOLDER_WATCH = "folder_watch" + + transcription_options: TranscriptionOptions + file_transcription_options: FileTranscriptionOptions + model_path: str + id: int = field(default_factory=lambda: randint(0, 100_000_000)) + segments: List[Segment] = field(default_factory=list) + status: Optional[Status] = None + fraction_completed = 0.0 + error: Optional[str] = None + queued_at: Optional[datetime.datetime] = None + started_at: Optional[datetime.datetime] = None + completed_at: Optional[datetime.datetime] = None + output_directory: Optional[str] = None + source: Source = Source.FILE_IMPORT + file_path: Optional[str] = None + url: Optional[str] = None + fraction_downloaded: float = 0.0 + + def status_text(self) -> str: + match self.status: + case FileTranscriptionTask.Status.IN_PROGRESS: + if self.fraction_downloaded > 0 and self.fraction_completed == 0: + return f'{_("Downloading")} ({self.fraction_downloaded :.0%})' + return f'{_("In Progress")} ({self.fraction_completed :.0%})' + case FileTranscriptionTask.Status.COMPLETED: + status = _("Completed") + if self.started_at is not None and self.completed_at is not None: + status += f" ({self.format_timedelta(self.completed_at - self.started_at)})" + return status + case FileTranscriptionTask.Status.FAILED: + return f'{_("Failed")} ({self.error})' + case FileTranscriptionTask.Status.CANCELED: + return _("Canceled") + case FileTranscriptionTask.Status.QUEUED: + return _("Queued") + case _: + return "" + + @staticmethod + def format_timedelta(delta: datetime.timedelta): + mm, ss = divmod(delta.seconds, 60) + result = f"{ss}s" + if mm == 0: + return result + hh, mm = divmod(mm, 60) + result = f"{mm}m {result}" + if hh == 0: + return result + return f"{hh}h {result}" + + +class OutputFormat(enum.Enum): + TXT = "txt" + SRT = "srt" + VTT = "vtt" + + +class Stopped(Exception): + pass + + +SUPPORTED_AUDIO_FORMATS = "Audio files (*.mp3 *.wav *.m4a *.ogg);;\ +Video files (*.mp4 *.webm *.ogm *.mov);;All files (*.*)" + + +def get_output_file_path(task: FileTranscriptionTask, output_format: OutputFormat): + input_file_name = os.path.splitext(os.path.basename(task.file_path))[0] + date_time_now = datetime.datetime.now().strftime("%d-%b-%Y %H-%M-%S") + output_file_name = ( + task.file_transcription_options.default_output_file_name.replace( + "{{ input_file_name }}", input_file_name + ) + .replace("{{ task }}", task.transcription_options.task.value) + .replace("{{ language }}", task.transcription_options.language or "") + .replace("{{ model_type }}", task.transcription_options.model.model_type.value) + .replace( + "{{ model_size }}", + task.transcription_options.model.whisper_model_size.value + if task.transcription_options.model.whisper_model_size is not None + else "", + ) + .replace("{{ date_time }}", date_time_now) + + f".{output_format.value}" + ) + + output_directory = task.output_directory or os.path.dirname(task.file_path) + return os.path.join(output_directory, output_file_name) diff --git a/buzz/transcriber/whisper_cpp.py b/buzz/transcriber/whisper_cpp.py new file mode 100644 index 00000000..f1b1174f --- /dev/null +++ b/buzz/transcriber/whisper_cpp.py @@ -0,0 +1,70 @@ +import ctypes +import logging +from typing import Union, Any, List + +import numpy as np + +from buzz import whisper_cpp, whisper_audio +from buzz.transcriber.transcriber import Segment, Task + + +class WhisperCpp: + def __init__(self, model: str) -> None: + self.ctx = whisper_cpp.whisper_init_from_file(model.encode()) + + def transcribe(self, audio: Union[np.ndarray, str], params: Any): + if isinstance(audio, str): + audio = whisper_audio.load_audio(audio) + + logging.debug("Loaded audio with length = %s", len(audio)) + + whisper_cpp_audio = audio.ctypes.data_as(ctypes.POINTER(ctypes.c_float)) + result = whisper_cpp.whisper_full( + self.ctx, params, whisper_cpp_audio, len(audio) + ) + if result != 0: + raise Exception(f"Error from whisper.cpp: {result}") + + segments: List[Segment] = [] + + n_segments = whisper_cpp.whisper_full_n_segments((self.ctx)) + for i in range(n_segments): + txt = whisper_cpp.whisper_full_get_segment_text((self.ctx), i) + t0 = whisper_cpp.whisper_full_get_segment_t0((self.ctx), i) + t1 = whisper_cpp.whisper_full_get_segment_t1((self.ctx), i) + + segments.append( + Segment( + start=t0 * 10, # centisecond to ms + end=t1 * 10, # centisecond to ms + text=txt.decode("utf-8"), + ) + ) + + return { + "segments": segments, + "text": "".join([segment.text for segment in segments]), + } + + def __del__(self): + whisper_cpp.whisper_free(self.ctx) + + +def whisper_cpp_params( + language: str, + task: Task, + word_level_timings: bool, + print_realtime=False, + print_progress=False, +): + params = whisper_cpp.whisper_full_default_params( + whisper_cpp.WHISPER_SAMPLING_GREEDY + ) + params.print_realtime = print_realtime + params.print_progress = print_progress + params.language = whisper_cpp.String(language.encode()) + params.translate = task == Task.TRANSLATE + params.max_len = ctypes.c_int(1) + params.max_len = 1 if word_level_timings else 0 + params.token_timestamps = word_level_timings + return params diff --git a/buzz/transcriber/whisper_cpp_file_transcriber.py b/buzz/transcriber/whisper_cpp_file_transcriber.py new file mode 100644 index 00000000..ff816efa --- /dev/null +++ b/buzz/transcriber/whisper_cpp_file_transcriber.py @@ -0,0 +1,96 @@ +import ctypes +import logging +import sys +from typing import Optional, List + +from PyQt6.QtCore import QObject + +from buzz import whisper_cpp, whisper_audio +from buzz.transcriber.file_transcriber import FileTranscriber +from buzz.transcriber.transcriber import FileTranscriptionTask, Segment, Stopped +from buzz.transcriber.whisper_cpp import WhisperCpp, whisper_cpp_params + + +class WhisperCppFileTranscriber(FileTranscriber): + duration_audio_ms = sys.maxsize # max int + state: "WhisperCppFileTranscriber.State" + + class State: + running = True + + def __init__( + self, task: FileTranscriptionTask, parent: Optional["QObject"] = None + ) -> None: + super().__init__(task, parent) + + self.language = task.transcription_options.language + self.model_path = task.model_path + self.task = task.transcription_options.task + self.word_level_timings = task.transcription_options.word_level_timings + self.state = self.State() + + def transcribe(self) -> List[Segment]: + self.state.running = True + model_path = self.model_path + + logging.debug( + "Starting whisper_cpp file transcription, file path = %s, language = %s, " + "task = %s, model_path = %s, word level timings = %s", + self.transcription_task.file_path, + self.language, + self.task, + model_path, + self.word_level_timings, + ) + + audio = whisper_audio.load_audio(self.transcription_task.file_path) + self.duration_audio_ms = len(audio) * 1000 / whisper_audio.SAMPLE_RATE + + whisper_params = whisper_cpp_params( + language=self.language if self.language is not None else "", + task=self.task, + word_level_timings=self.word_level_timings, + ) + whisper_params.encoder_begin_callback_user_data = ctypes.c_void_p( + id(self.state) + ) + whisper_params.encoder_begin_callback = ( + whisper_cpp.whisper_encoder_begin_callback(self.encoder_begin_callback) + ) + whisper_params.new_segment_callback_user_data = ctypes.c_void_p(id(self.state)) + whisper_params.new_segment_callback = whisper_cpp.whisper_new_segment_callback( + self.new_segment_callback + ) + + model = WhisperCpp(model=model_path) + result = model.transcribe( + audio=self.transcription_task.file_path, params=whisper_params + ) + + if not self.state.running: + raise Stopped + + self.state.running = False + return result["segments"] + + def new_segment_callback(self, ctx, _state, _n_new, user_data): + n_segments = whisper_cpp.whisper_full_n_segments(ctx) + t1 = whisper_cpp.whisper_full_get_segment_t1(ctx, n_segments - 1) + # t1 seems to sometimes be larger than the duration when the + # audio ends in silence. Trim to fix the displayed progress. + progress = min(t1 * 10, self.duration_audio_ms) + state: WhisperCppFileTranscriber.State = ctypes.cast( + user_data, ctypes.py_object + ).value + if state.running: + self.progress.emit((progress, self.duration_audio_ms)) + + @staticmethod + def encoder_begin_callback(_ctx, _state, user_data): + state: WhisperCppFileTranscriber.State = ctypes.cast( + user_data, ctypes.py_object + ).value + return state.running == 1 + + def stop(self): + self.state.running = False diff --git a/buzz/transcriber/whisper_file_transcriber.py b/buzz/transcriber/whisper_file_transcriber.py new file mode 100644 index 00000000..4a5aad73 --- /dev/null +++ b/buzz/transcriber/whisper_file_transcriber.py @@ -0,0 +1,236 @@ +import datetime +import json +import logging +import multiprocessing +import sys +from multiprocessing.connection import Connection +from threading import Thread +from typing import Optional, List + +import tqdm +from PyQt6.QtCore import QObject + +from buzz import transformers_whisper +from buzz.conn import pipe_stderr +from buzz.model_loader import ModelType +from buzz.transcriber.file_transcriber import FileTranscriber +from buzz.transcriber.transcriber import FileTranscriptionTask, Segment + +if sys.platform != "linux": + import faster_whisper + import whisper + import stable_whisper + + +class WhisperFileTranscriber(FileTranscriber): + """WhisperFileTranscriber transcribes an audio file to text, writes the text to a file, and then opens the file + using the default program for opening txt files.""" + + current_process: multiprocessing.Process + running = False + read_line_thread: Optional[Thread] = None + READ_LINE_THREAD_STOP_TOKEN = "--STOP--" + + def __init__( + self, task: FileTranscriptionTask, parent: Optional["QObject"] = None + ) -> None: + super().__init__(task, parent) + self.segments = [] + self.started_process = False + self.stopped = False + + def transcribe(self) -> List[Segment]: + time_started = datetime.datetime.now() + logging.debug( + "Starting whisper file transcription, task = %s", self.transcription_task + ) + + recv_pipe, send_pipe = multiprocessing.Pipe(duplex=False) + + self.current_process = multiprocessing.Process( + target=self.transcribe_whisper, args=(send_pipe, self.transcription_task) + ) + if not self.stopped: + self.current_process.start() + self.started_process = True + + self.read_line_thread = Thread(target=self.read_line, args=(recv_pipe,)) + self.read_line_thread.start() + + self.current_process.join() + + if self.current_process.exitcode != 0: + send_pipe.close() + + self.read_line_thread.join() + + logging.debug( + "whisper process completed with code = %s, time taken = %s, number of segments = %s", + self.current_process.exitcode, + datetime.datetime.now() - time_started, + len(self.segments), + ) + + if self.current_process.exitcode != 0: + raise Exception("Unknown error") + + return self.segments + + @classmethod + def transcribe_whisper( + cls, stderr_conn: Connection, task: FileTranscriptionTask + ) -> None: + with pipe_stderr(stderr_conn): + if task.transcription_options.model.model_type == ModelType.HUGGING_FACE: + segments = cls.transcribe_hugging_face(task) + elif ( + task.transcription_options.model.model_type == ModelType.FASTER_WHISPER + ): + segments = cls.transcribe_faster_whisper(task) + elif task.transcription_options.model.model_type == ModelType.WHISPER: + segments = cls.transcribe_openai_whisper(task) + else: + raise Exception( + f"Invalid model type: {task.transcription_options.model.model_type}" + ) + + segments_json = json.dumps(segments, ensure_ascii=True, default=vars) + sys.stderr.write(f"segments = {segments_json}\n") + sys.stderr.write(WhisperFileTranscriber.READ_LINE_THREAD_STOP_TOKEN + "\n") + + @classmethod + def transcribe_hugging_face(cls, task: FileTranscriptionTask) -> List[Segment]: + model = transformers_whisper.load_model(task.model_path) + language = ( + task.transcription_options.language + if task.transcription_options.language is not None + else "en" + ) + result = model.transcribe( + audio=task.file_path, + language=language, + task=task.transcription_options.task.value, + verbose=False, + ) + return [ + Segment( + start=int(segment.get("start") * 1000), + end=int(segment.get("end") * 1000), + text=segment.get("text"), + ) + for segment in result.get("segments") + ] + + @classmethod + def transcribe_faster_whisper(cls, task: FileTranscriptionTask) -> List[Segment]: + model = faster_whisper.WhisperModel( + model_size_or_path=task.transcription_options.model.whisper_model_size.to_faster_whisper_model_size() + ) + whisper_segments, info = model.transcribe( + audio=task.file_path, + language=task.transcription_options.language, + task=task.transcription_options.task.value, + temperature=task.transcription_options.temperature, + initial_prompt=task.transcription_options.initial_prompt, + word_timestamps=task.transcription_options.word_level_timings, + ) + segments = [] + with tqdm.tqdm(total=round(info.duration, 2), unit=" seconds") as pbar: + for segment in list(whisper_segments): + # Segment will contain words if word-level timings is True + if segment.words: + for word in segment.words: + segments.append( + Segment( + start=int(word.start * 1000), + end=int(word.end * 1000), + text=word.word, + ) + ) + else: + segments.append( + Segment( + start=int(segment.start * 1000), + end=int(segment.end * 1000), + text=segment.text, + ) + ) + + pbar.update(segment.end - segment.start) + return segments + + @classmethod + def transcribe_openai_whisper(cls, task: FileTranscriptionTask) -> List[Segment]: + model = whisper.load_model(task.model_path) + + if task.transcription_options.word_level_timings: + stable_whisper.modify_model(model) + result = model.transcribe( + audio=task.file_path, + language=task.transcription_options.language, + task=task.transcription_options.task.value, + temperature=task.transcription_options.temperature, + initial_prompt=task.transcription_options.initial_prompt, + pbar=True, + ) + segments = stable_whisper.group_word_timestamps(result) + return [ + Segment( + start=int(segment.get("start") * 1000), + end=int(segment.get("end") * 1000), + text=segment.get("text"), + ) + for segment in segments + ] + + result = model.transcribe( + audio=task.file_path, + language=task.transcription_options.language, + task=task.transcription_options.task.value, + temperature=task.transcription_options.temperature, + initial_prompt=task.transcription_options.initial_prompt, + verbose=False, + ) + segments = result.get("segments") + return [ + Segment( + start=int(segment.get("start") * 1000), + end=int(segment.get("end") * 1000), + text=segment.get("text"), + ) + for segment in segments + ] + + def stop(self): + self.stopped = True + if self.started_process: + self.current_process.terminate() + + def read_line(self, pipe: Connection): + while True: + try: + line = pipe.recv().strip() + except EOFError: # Connection closed + break + + if line == self.READ_LINE_THREAD_STOP_TOKEN: + return + + if line.startswith("segments = "): + segments_dict = json.loads(line[11:]) + segments = [ + Segment( + start=segment.get("start"), + end=segment.get("end"), + text=segment.get("text"), + ) + for segment in segments_dict + ] + self.segments = segments + else: + try: + progress = int(line.split("|")[0].strip().strip("%")) + self.progress.emit((progress, 100)) + except ValueError: + logging.debug("whisper (stderr): %s", line) + continue diff --git a/buzz/widgets/application.py b/buzz/widgets/application.py index 2365779a..80ba3b05 100644 --- a/buzz/widgets/application.py +++ b/buzz/widgets/application.py @@ -4,7 +4,7 @@ from PyQt6.QtWidgets import QApplication from buzz.__version__ import VERSION from buzz.settings.settings import APP_NAME -from buzz.transcriber import FileTranscriptionTask +from buzz.transcriber.transcriber import FileTranscriptionTask from buzz.widgets.main_window import MainWindow diff --git a/buzz/widgets/import_url_dialog.py b/buzz/widgets/import_url_dialog.py new file mode 100644 index 00000000..4f6e89ad --- /dev/null +++ b/buzz/widgets/import_url_dialog.py @@ -0,0 +1,53 @@ +from typing import Optional + +from PyQt6.QtCore import Qt, QRegularExpression +from PyQt6.QtWidgets import QDialog, QWidget, QDialogButtonBox, QVBoxLayout, QMessageBox + +from buzz.locale import _ +from buzz.widgets.line_edit import LineEdit + + +class ImportURLDialog(QDialog): + url: Optional[str] = None + url_regex = QRegularExpression( + "^((http|https)://)[-a-zA-Z0-9@:%._\\+~#?&//=]{2,256}\\.[a-z]{2,6}\\b([-a-zA-Z0-9@:%._\\+~#?&//=]*)$" + ) + + def __init__(self, parent: Optional[QWidget] = None): + super().__init__(parent=parent, flags=Qt.WindowType.Window) + + self.setWindowTitle(_("Import URL")) + + self.line_edit = LineEdit() + self.line_edit.setPlaceholderText(_("URL")) + self.line_edit.setMinimumWidth(350) + + self.button_box = QDialogButtonBox( + QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel + ) + self.button_box.accepted.connect(self.accept) + self.button_box.rejected.connect(self.reject) + + self.layout = QVBoxLayout() + self.layout.addWidget(self.line_edit) + self.layout.addWidget(self.button_box) + self.setLayout(self.layout) + + self.setMaximumSize(0, 0) + + def accept(self): + if self.url_regex.match(self.line_edit.text()).hasMatch(): + self.url = self.line_edit.text() + super().accept() + else: + QMessageBox.critical( + self, _("Invalid URL"), _("The URL you entered is invalid.") + ) + + @classmethod + def prompt(cls, parent: Optional[QWidget] = None) -> Optional[str]: + dialog = cls(parent=parent) + if dialog.exec() == QDialog.DialogCode.Accepted: + return dialog.url + else: + return None diff --git a/buzz/widgets/main_window.py b/buzz/widgets/main_window.py index a61cf950..d4d8bb83 100644 --- a/buzz/widgets/main_window.py +++ b/buzz/widgets/main_window.py @@ -1,4 +1,4 @@ -from typing import Dict, Tuple, List +from typing import Dict, Tuple, List, Optional from PyQt6 import QtGui from PyQt6.QtCore import ( @@ -7,7 +7,11 @@ from PyQt6.QtCore import ( QModelIndex, ) from PyQt6.QtGui import QIcon -from PyQt6.QtWidgets import QMainWindow, QMessageBox, QFileDialog +from PyQt6.QtWidgets import ( + QMainWindow, + QMessageBox, + QFileDialog, +) from buzz.cache import TasksCache from buzz.file_transcriber_queue_worker import FileTranscriberQueueWorker @@ -15,13 +19,14 @@ from buzz.locale import _ from buzz.settings.settings import APP_NAME, Settings from buzz.settings.shortcut_settings import ShortcutSettings from buzz.store.keyring_store import KeyringStore -from buzz.transcriber import ( +from buzz.transcriber.transcriber import ( FileTranscriptionTask, TranscriptionOptions, FileTranscriptionOptions, SUPPORTED_AUDIO_FORMATS, ) from buzz.widgets.icon import BUZZ_ICON_PATH +from buzz.widgets.import_url_dialog import ImportURLDialog from buzz.widgets.main_window_toolbar import MainWindowToolbar from buzz.widgets.menu_bar import MenuBar from buzz.widgets.preferences_dialog.models.preferences import Preferences @@ -87,6 +92,9 @@ class MainWindow(QMainWindow): self.menu_bar.import_action_triggered.connect( self.on_new_transcription_action_triggered ) + self.menu_bar.import_url_action_triggered.connect( + self.on_new_url_transcription_action_triggered + ) self.menu_bar.shortcuts_changed.connect(self.on_shortcuts_changed) self.menu_bar.openai_api_key_changed.connect( self.on_openai_access_token_changed @@ -161,9 +169,24 @@ class MainWindow(QMainWindow): self, options: Tuple[TranscriptionOptions, FileTranscriptionOptions, str] ): transcription_options, file_transcription_options, model_path = options - for file_path in file_transcription_options.file_paths: + + if file_transcription_options.file_paths is not None: + for file_path in file_transcription_options.file_paths: + task = FileTranscriptionTask( + transcription_options=transcription_options, + file_transcription_options=file_transcription_options, + model_path=model_path, + file_path=file_path, + source=FileTranscriptionTask.Source.FILE_IMPORT, + ) + self.add_task(task) + else: task = FileTranscriptionTask( - file_path, transcription_options, file_transcription_options, model_path + transcription_options=transcription_options, + file_transcription_options=file_transcription_options, + model_path=model_path, + url=file_transcription_options.url, + source=FileTranscriptionTask.Source.URL_IMPORT, ) self.add_task(task) @@ -225,9 +248,17 @@ class MainWindow(QMainWindow): self.open_file_transcriber_widget(file_paths) - def open_file_transcriber_widget(self, file_paths: List[str]): + def on_new_url_transcription_action_triggered(self): + url = ImportURLDialog.prompt(parent=self) + if url is not None: + self.open_file_transcriber_widget(url=url) + + def open_file_transcriber_widget( + self, file_paths: Optional[List[str]] = None, url: Optional[str] = None + ): file_transcriber_window = FileTranscriberWidget( file_paths=file_paths, + url=url, default_output_file_name=self.default_export_file_name, parent=self, flags=Qt.WindowType.Window, diff --git a/buzz/widgets/menu_bar.py b/buzz/widgets/menu_bar.py index 932dfb65..e7333198 100644 --- a/buzz/widgets/menu_bar.py +++ b/buzz/widgets/menu_bar.py @@ -17,6 +17,7 @@ from buzz.widgets.preferences_dialog.preferences_dialog import ( class MenuBar(QMenuBar): import_action_triggered = pyqtSignal() + import_url_action_triggered = pyqtSignal() shortcuts_changed = pyqtSignal(dict) openai_api_key_changed = pyqtSignal(str) default_export_file_name_changed = pyqtSignal(str) @@ -36,8 +37,11 @@ class MenuBar(QMenuBar): self.default_export_file_name = default_export_file_name self.preferences = preferences - self.import_action = QAction(_("Import Media File..."), self) - self.import_action.triggered.connect(self.on_import_action_triggered) + self.import_action = QAction(_("Import File..."), self) + self.import_action.triggered.connect(self.import_action_triggered) + + self.import_url_action = QAction(_("Import URL..."), self) + self.import_url_action.triggered.connect(self.import_url_action_triggered) about_action = QAction(f'{_("About")} {APP_NAME}', self) about_action.triggered.connect(self.on_about_action_triggered) @@ -52,15 +56,13 @@ class MenuBar(QMenuBar): file_menu = self.addMenu(_("File")) file_menu.addAction(self.import_action) + file_menu.addAction(self.import_url_action) help_menu = self.addMenu(_("Help")) help_menu.addAction(about_action) help_menu.addAction(help_action) help_menu.addAction(self.preferences_action) - def on_import_action_triggered(self): - self.import_action_triggered.emit() - def on_about_action_triggered(self): about_dialog = AboutDialog(parent=self) about_dialog.open() @@ -97,6 +99,9 @@ class MenuBar(QMenuBar): self.import_action.setShortcut( QKeySequence.fromString(shortcuts[Shortcut.OPEN_IMPORT_WINDOW.name]) ) + self.import_url_action.setShortcut( + QKeySequence.fromString(shortcuts[Shortcut.OPEN_IMPORT_URL_WINDOW.name]) + ) self.preferences_action.setShortcut( QKeySequence.fromString(shortcuts[Shortcut.OPEN_PREFERENCES_WINDOW.name]) ) diff --git a/buzz/widgets/preferences_dialog/folder_watch_preferences_widget.py b/buzz/widgets/preferences_dialog/folder_watch_preferences_widget.py index 8ade7773..3d77b286 100644 --- a/buzz/widgets/preferences_dialog/folder_watch_preferences_widget.py +++ b/buzz/widgets/preferences_dialog/folder_watch_preferences_widget.py @@ -12,7 +12,7 @@ from PyQt6.QtWidgets import ( ) from buzz.store.keyring_store import KeyringStore -from buzz.transcriber import ( +from buzz.transcriber.transcriber import ( TranscriptionOptions, FileTranscriptionOptions, ) diff --git a/buzz/widgets/preferences_dialog/models/file_transcription_preferences.py b/buzz/widgets/preferences_dialog/models/file_transcription_preferences.py index 44fb691b..9ba76ef4 100644 --- a/buzz/widgets/preferences_dialog/models/file_transcription_preferences.py +++ b/buzz/widgets/preferences_dialog/models/file_transcription_preferences.py @@ -4,7 +4,7 @@ from typing import Optional, Tuple, Set, List from PyQt6.QtCore import QSettings from buzz.model_loader import TranscriptionModel -from buzz.transcriber import ( +from buzz.transcriber.transcriber import ( Task, OutputFormat, DEFAULT_WHISPER_TEMPERATURE, @@ -79,7 +79,8 @@ class FileTranscriptionPreferences: def to_transcription_options( self, openai_access_token: Optional[str], - file_paths: List[str], + file_paths: Optional[List[str]] = None, + url: Optional[str] = None, default_output_file_name: str = "", ) -> Tuple[TranscriptionOptions, FileTranscriptionOptions]: return ( @@ -95,6 +96,7 @@ class FileTranscriptionPreferences: FileTranscriptionOptions( output_formats=self.output_formats, file_paths=file_paths, + url=url, default_output_file_name=default_output_file_name, ), ) diff --git a/buzz/widgets/recording_transcriber_widget.py b/buzz/widgets/recording_transcriber_widget.py index 4a28fd89..9585db28 100644 --- a/buzz/widgets/recording_transcriber_widget.py +++ b/buzz/widgets/recording_transcriber_widget.py @@ -14,12 +14,12 @@ from buzz.model_loader import ( ModelType, ) from buzz.recording import RecordingAmplitudeListener -from buzz.recording_transcriber import RecordingTranscriber from buzz.settings.settings import Settings -from buzz.transcriber import ( +from buzz.transcriber.recording_transcriber import RecordingTranscriber +from buzz.transcriber.transcriber import ( TranscriptionOptions, - Task, DEFAULT_WHISPER_TEMPERATURE, + Task, ) from buzz.widgets.audio_devices_combo_box import AudioDevicesComboBox from buzz.widgets.audio_meter_widget import AudioMeterWidget diff --git a/buzz/widgets/transcriber/advanced_settings_dialog.py b/buzz/widgets/transcriber/advanced_settings_dialog.py index be7fe099..f0ce081d 100644 --- a/buzz/widgets/transcriber/advanced_settings_dialog.py +++ b/buzz/widgets/transcriber/advanced_settings_dialog.py @@ -10,7 +10,7 @@ from PyQt6.QtWidgets import ( from buzz.widgets.transcriber.temperature_validator import TemperatureValidator from buzz.locale import _ from buzz.model_loader import ModelType -from buzz.transcriber import TranscriptionOptions +from buzz.transcriber.transcriber import TranscriptionOptions from buzz.widgets.line_edit import LineEdit diff --git a/buzz/widgets/transcriber/file_transcriber_widget.py b/buzz/widgets/transcriber/file_transcriber_widget.py index bef88e7e..70f49135 100644 --- a/buzz/widgets/transcriber/file_transcriber_widget.py +++ b/buzz/widgets/transcriber/file_transcriber_widget.py @@ -11,10 +11,10 @@ from PyQt6.QtWidgets import ( from buzz.dialogs import show_model_download_error_dialog from buzz.locale import _ from buzz.model_loader import ModelDownloader -from buzz.paths import file_paths_as_title +from buzz.paths import file_path_as_title from buzz.settings.settings import Settings from buzz.store.keyring_store import KeyringStore -from buzz.transcriber import ( +from buzz.transcriber.transcriber import ( FileTranscriptionOptions, TranscriptionOptions, ) @@ -40,21 +40,23 @@ class FileTranscriberWidget(QWidget): def __init__( self, - file_paths: List[str], default_output_file_name: str, + file_paths: Optional[List[str]] = None, + url: Optional[str] = None, parent: Optional[QWidget] = None, flags: Qt.WindowType = Qt.WindowType.Widget, ) -> None: super().__init__(parent, flags) - self.setWindowTitle(file_paths_as_title(file_paths)) + self.url = url + self.file_paths = file_paths + + self.setWindowTitle(self.get_title()) openai_access_token = KeyringStore().get_password( KeyringStore.Key.OPENAI_API_KEY ) - self.file_paths = file_paths - preferences = self.load_preferences() ( @@ -63,6 +65,7 @@ class FileTranscriberWidget(QWidget): ) = preferences.to_transcription_options( openai_access_token=openai_access_token, file_paths=self.file_paths, + url=url, default_output_file_name=default_output_file_name, ) @@ -89,6 +92,13 @@ class FileTranscriberWidget(QWidget): self.reset_transcriber_controls() + def get_title(self) -> str: + if self.file_paths is not None: + return ", ".join([file_path_as_title(path) for path in self.file_paths]) + if self.url is not None: + return self.url + return "" + def load_preferences(self): self.settings.settings.beginGroup("file_transcriber") preferences = FileTranscriptionPreferences.load(settings=self.settings.settings) diff --git a/buzz/widgets/transcriber/file_transcription_form_widget.py b/buzz/widgets/transcriber/file_transcription_form_widget.py index ef0c0968..3eaf9cae 100644 --- a/buzz/widgets/transcriber/file_transcription_form_widget.py +++ b/buzz/widgets/transcriber/file_transcription_form_widget.py @@ -5,7 +5,7 @@ from PyQt6.QtWidgets import QWidget, QVBoxLayout, QCheckBox, QFormLayout, QHBoxL from buzz.locale import _ from buzz.model_loader import ModelType -from buzz.transcriber import ( +from buzz.transcriber.transcriber import ( TranscriptionOptions, FileTranscriptionOptions, OutputFormat, diff --git a/buzz/widgets/transcriber/languages_combo_box.py b/buzz/widgets/transcriber/languages_combo_box.py index 3355af63..c775c6a3 100644 --- a/buzz/widgets/transcriber/languages_combo_box.py +++ b/buzz/widgets/transcriber/languages_combo_box.py @@ -4,7 +4,7 @@ from PyQt6.QtCore import pyqtSignal from PyQt6.QtWidgets import QComboBox, QWidget from buzz.locale import _ -from buzz.transcriber import LANGUAGES +from buzz.transcriber.transcriber import LANGUAGES class LanguagesComboBox(QComboBox): diff --git a/buzz/widgets/transcriber/tasks_combo_box.py b/buzz/widgets/transcriber/tasks_combo_box.py index f889a2be..736d328f 100644 --- a/buzz/widgets/transcriber/tasks_combo_box.py +++ b/buzz/widgets/transcriber/tasks_combo_box.py @@ -3,7 +3,7 @@ from typing import Optional from PyQt6.QtCore import pyqtSignal from PyQt6.QtWidgets import QComboBox, QWidget -from buzz.transcriber import Task +from buzz.transcriber.transcriber import Task class TasksComboBox(QComboBox): diff --git a/buzz/widgets/transcriber/transcription_options_group_box.py b/buzz/widgets/transcriber/transcription_options_group_box.py index 8eb592e6..cd2cc435 100644 --- a/buzz/widgets/transcriber/transcription_options_group_box.py +++ b/buzz/widgets/transcriber/transcription_options_group_box.py @@ -5,7 +5,7 @@ from PyQt6.QtWidgets import QGroupBox, QWidget, QFormLayout, QComboBox from buzz.locale import _ from buzz.model_loader import ModelType, WhisperModelSize -from buzz.transcriber import TranscriptionOptions, Task +from buzz.transcriber.transcriber import TranscriptionOptions, Task from buzz.widgets.model_type_combo_box import ModelTypeComboBox from buzz.widgets.openai_api_key_line_edit import OpenAIAPIKeyLineEdit from buzz.widgets.transcriber.advanced_settings_button import AdvancedSettingsButton diff --git a/buzz/widgets/transcription_task_folder_watcher.py b/buzz/widgets/transcription_task_folder_watcher.py index e1d59d1f..668f45dd 100644 --- a/buzz/widgets/transcription_task_folder_watcher.py +++ b/buzz/widgets/transcription_task_folder_watcher.py @@ -5,7 +5,7 @@ from typing import Dict from PyQt6.QtCore import QFileSystemWatcher, pyqtSignal, QObject from buzz.store.keyring_store import KeyringStore -from buzz.transcriber import FileTranscriptionTask +from buzz.transcriber.transcriber import FileTranscriptionTask from buzz.widgets.preferences_dialog.models.folder_watch_preferences import ( FolderWatchPreferences, ) diff --git a/buzz/widgets/transcription_tasks_table_widget.py b/buzz/widgets/transcription_tasks_table_widget.py index 1b112ea9..b069a085 100644 --- a/buzz/widgets/transcription_tasks_table_widget.py +++ b/buzz/widgets/transcription_tasks_table_widget.py @@ -16,7 +16,7 @@ from PyQt6.QtWidgets import ( from buzz.locale import _ from buzz.settings.settings import Settings -from buzz.transcriber import FileTranscriptionTask, humanize_language +from buzz.transcriber.transcriber import FileTranscriptionTask, humanize_language @dataclass @@ -61,10 +61,12 @@ class TranscriptionTasksTableWidget(QTableWidget): ), TableColDef( id="file_name", - header=_("File Name"), + header=_("File Name/URL"), column_index=self.Column.FILE_NAME.value, - value_getter=lambda task: os.path.basename(task.file_path), - width=250, + value_getter=lambda task: task.url + if task.url is not None + else os.path.basename(task.file_path), + width=300, hidden_toggleable=False, ), TableColDef( diff --git a/buzz/widgets/transcription_viewer/export_transcription_button.py b/buzz/widgets/transcription_viewer/export_transcription_button.py index e240c75c..50b79310 100644 --- a/buzz/widgets/transcription_viewer/export_transcription_button.py +++ b/buzz/widgets/transcription_viewer/export_transcription_button.py @@ -2,11 +2,11 @@ from PyQt6.QtGui import QAction from PyQt6.QtWidgets import QPushButton, QWidget, QMenu, QFileDialog from buzz.locale import _ -from buzz.transcriber import ( +from buzz.transcriber.file_transcriber import write_output +from buzz.transcriber.transcriber import ( FileTranscriptionTask, OutputFormat, get_output_file_path, - write_output, ) from buzz.widgets.icon import FileDownloadIcon diff --git a/buzz/widgets/transcription_viewer/transcription_segments_editor_widget.py b/buzz/widgets/transcription_viewer/transcription_segments_editor_widget.py index 9bdbebe0..79acc43a 100644 --- a/buzz/widgets/transcription_viewer/transcription_segments_editor_widget.py +++ b/buzz/widgets/transcription_viewer/transcription_segments_editor_widget.py @@ -5,7 +5,8 @@ from PyQt6.QtCore import Qt, pyqtSignal from PyQt6.QtWidgets import QTableWidget, QWidget, QHeaderView, QTableWidgetItem from buzz.locale import _ -from buzz.transcriber import Segment, to_timestamp +from buzz.transcriber.file_transcriber import to_timestamp +from buzz.transcriber.transcriber import Segment class TranscriptionSegmentsEditorWidget(QTableWidget): diff --git a/buzz/widgets/transcription_viewer/transcription_viewer_widget.py b/buzz/widgets/transcription_viewer/transcription_viewer_widget.py index 16be8320..0f1c8bd3 100644 --- a/buzz/widgets/transcription_viewer/transcription_viewer_widget.py +++ b/buzz/widgets/transcription_viewer/transcription_viewer_widget.py @@ -13,7 +13,7 @@ from PyQt6.QtWidgets import ( from buzz.action import Action from buzz.locale import _ from buzz.paths import file_path_as_title -from buzz.transcriber import ( +from buzz.transcriber.transcriber import ( FileTranscriptionTask, Segment, ) diff --git a/poetry.lock b/poetry.lock index 60c13654..2f201efc 100644 --- a/poetry.lock +++ b/poetry.lock @@ -139,6 +139,137 @@ files = [ { file = "av-10.0.0.tar.gz", hash = "sha256:8afd3d5610e1086f3b2d8389d66672ea78624516912c93612de64dcaa4c67e05" }, ] +[[package]] +name = "brotli" +version = "1.1.0" +description = "Python bindings for the Brotli compression library" +optional = false +python-versions = "*" +files = [ + { file = "Brotli-1.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e1140c64812cb9b06c922e77f1c26a75ec5e3f0fb2bf92cc8c58720dec276752" }, + { file = "Brotli-1.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c8fd5270e906eef71d4a8d19b7c6a43760c6abcfcc10c9101d14eb2357418de9" }, + { file = "Brotli-1.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ae56aca0402a0f9a3431cddda62ad71666ca9d4dc3a10a142b9dce2e3c0cda3" }, + { file = "Brotli-1.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:43ce1b9935bfa1ede40028054d7f48b5469cd02733a365eec8a329ffd342915d" }, + { file = "Brotli-1.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:7c4855522edb2e6ae7fdb58e07c3ba9111e7621a8956f481c68d5d979c93032e" }, + { file = "Brotli-1.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:38025d9f30cf4634f8309c6874ef871b841eb3c347e90b0851f63d1ded5212da" }, + { file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e6a904cb26bfefc2f0a6f240bdf5233be78cd2488900a2f846f3c3ac8489ab80" }, + { file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a37b8f0391212d29b3a91a799c8e4a2855e0576911cdfb2515487e30e322253d" }, + { file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e84799f09591700a4154154cab9787452925578841a94321d5ee8fb9a9a328f0" }, + { file = "Brotli-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f66b5337fa213f1da0d9000bc8dc0cb5b896b726eefd9c6046f699b169c41b9e" }, + { file = "Brotli-1.1.0-cp310-cp310-win32.whl", hash = "sha256:be36e3d172dc816333f33520154d708a2657ea63762ec16b62ece02ab5e4daf2" }, + { file = "Brotli-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:0c6244521dda65ea562d5a69b9a26120769b7a9fb3db2fe9545935ed6735b128" }, + { file = "Brotli-1.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a3daabb76a78f829cafc365531c972016e4aa8d5b4bf60660ad8ecee19df7ccc" }, + { file = "Brotli-1.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c8146669223164fc87a7e3de9f81e9423c67a79d6b3447994dfb9c95da16e2d6" }, + { file = "Brotli-1.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30924eb4c57903d5a7526b08ef4a584acc22ab1ffa085faceb521521d2de32dd" }, + { file = "Brotli-1.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ceb64bbc6eac5a140ca649003756940f8d6a7c444a68af170b3187623b43bebf" }, + { file = "Brotli-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a469274ad18dc0e4d316eefa616d1d0c2ff9da369af19fa6f3daa4f09671fd61" }, + { file = "Brotli-1.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:524f35912131cc2cabb00edfd8d573b07f2d9f21fa824bd3fb19725a9cf06327" }, + { file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:5b3cc074004d968722f51e550b41a27be656ec48f8afaeeb45ebf65b561481dd" }, + { file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:19c116e796420b0cee3da1ccec3b764ed2952ccfcc298b55a10e5610ad7885f9" }, + { file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:510b5b1bfbe20e1a7b3baf5fed9e9451873559a976c1a78eebaa3b86c57b4265" }, + { file = "Brotli-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a1fd8a29719ccce974d523580987b7f8229aeace506952fa9ce1d53a033873c8" }, + { file = "Brotli-1.1.0-cp311-cp311-win32.whl", hash = "sha256:39da8adedf6942d76dc3e46653e52df937a3c4d6d18fdc94a7c29d263b1f5b50" }, + { file = "Brotli-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:aac0411d20e345dc0920bdec5548e438e999ff68d77564d5e9463a7ca9d3e7b1" }, + { file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:316cc9b17edf613ac76b1f1f305d2a748f1b976b033b049a6ecdfd5612c70409" }, + { file = "Brotli-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:caf9ee9a5775f3111642d33b86237b05808dafcd6268faa492250e9b78046eb2" }, + { file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70051525001750221daa10907c77830bc889cb6d865cc0b813d9db7fefc21451" }, + { file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7f4bf76817c14aa98cc6697ac02f3972cb8c3da93e9ef16b9c66573a68014f91" }, + { file = "Brotli-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0c5516f0aed654134a2fc936325cc2e642f8a0e096d075209672eb321cff408" }, + { file = "Brotli-1.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c3020404e0b5eefd7c9485ccf8393cfb75ec38ce75586e046573c9dc29967a0" }, + { file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4ed11165dd45ce798d99a136808a794a748d5dc38511303239d4e2363c0695dc" }, + { file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4093c631e96fdd49e0377a9c167bfd75b6d0bad2ace734c6eb20b348bc3ea180" }, + { file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e4c4629ddad63006efa0ef968c8e4751c5868ff0b1c5c40f76524e894c50248" }, + { file = "Brotli-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:861bf317735688269936f755fa136a99d1ed526883859f86e41a5d43c61d8966" }, + { file = "Brotli-1.1.0-cp312-cp312-win32.whl", hash = "sha256:5f4d5ea15c9382135076d2fb28dde923352fe02951e66935a9efaac8f10e81b0" }, + { file = "Brotli-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:906bc3a79de8c4ae5b86d3d75a8b77e44404b0f4261714306e3ad248d8ab0951" }, + { file = "Brotli-1.1.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a090ca607cbb6a34b0391776f0cb48062081f5f60ddcce5d11838e67a01928d1" }, + { file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2de9d02f5bda03d27ede52e8cfe7b865b066fa49258cbab568720aa5be80a47d" }, + { file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2333e30a5e00fe0fe55903c8832e08ee9c3b1382aacf4db26664a16528d51b4b" }, + { file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4d4a848d1837973bf0f4b5e54e3bec977d99be36a7895c61abb659301b02c112" }, + { file = "Brotli-1.1.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:fdc3ff3bfccdc6b9cc7c342c03aa2400683f0cb891d46e94b64a197910dc4064" }, + { file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:5eeb539606f18a0b232d4ba45adccde4125592f3f636a6182b4a8a436548b914" }, + { file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:fd5f17ff8f14003595ab414e45fce13d073e0762394f957182e69035c9f3d7c2" }, + { file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:069a121ac97412d1fe506da790b3e69f52254b9df4eb665cd42460c837193354" }, + { file = "Brotli-1.1.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:e93dfc1a1165e385cc8239fab7c036fb2cd8093728cbd85097b284d7b99249a2" }, + { file = "Brotli-1.1.0-cp36-cp36m-win32.whl", hash = "sha256:a599669fd7c47233438a56936988a2478685e74854088ef5293802123b5b2460" }, + { file = "Brotli-1.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:d143fd47fad1db3d7c27a1b1d66162e855b5d50a89666af46e1679c496e8e579" }, + { file = "Brotli-1.1.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:11d00ed0a83fa22d29bc6b64ef636c4552ebafcef57154b4ddd132f5638fbd1c" }, + { file = "Brotli-1.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f733d788519c7e3e71f0855c96618720f5d3d60c3cb829d8bbb722dddce37985" }, + { file = "Brotli-1.1.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:929811df5462e182b13920da56c6e0284af407d1de637d8e536c5cd00a7daf60" }, + { file = "Brotli-1.1.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0b63b949ff929fbc2d6d3ce0e924c9b93c9785d877a21a1b678877ffbbc4423a" }, + { file = "Brotli-1.1.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:d192f0f30804e55db0d0e0a35d83a9fead0e9a359a9ed0285dbacea60cc10a84" }, + { file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f296c40e23065d0d6650c4aefe7470d2a25fffda489bcc3eb66083f3ac9f6643" }, + { file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:919e32f147ae93a09fe064d77d5ebf4e35502a8df75c29fb05788528e330fe74" }, + { file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:23032ae55523cc7bccb4f6a0bf368cd25ad9bcdcc1990b64a647e7bbcce9cb5b" }, + { file = "Brotli-1.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:224e57f6eac61cc449f498cc5f0e1725ba2071a3d4f48d5d9dffba42db196438" }, + { file = "Brotli-1.1.0-cp37-cp37m-win32.whl", hash = "sha256:587ca6d3cef6e4e868102672d3bd9dc9698c309ba56d41c2b9c85bbb903cdb95" }, + { file = "Brotli-1.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:2954c1c23f81c2eaf0b0717d9380bd348578a94161a65b3a2afc62c86467dd68" }, + { file = "Brotli-1.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:efa8b278894b14d6da122a72fefcebc28445f2d3f880ac59d46c90f4c13be9a3" }, + { file = "Brotli-1.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:03d20af184290887bdea3f0f78c4f737d126c74dc2f3ccadf07e54ceca3bf208" }, + { file = "Brotli-1.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6172447e1b368dcbc458925e5ddaf9113477b0ed542df258d84fa28fc45ceea7" }, + { file = "Brotli-1.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a743e5a28af5f70f9c080380a5f908d4d21d40e8f0e0c8901604d15cfa9ba751" }, + { file = "Brotli-1.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0541e747cce78e24ea12d69176f6a7ddb690e62c425e01d31cc065e69ce55b48" }, + { file = "Brotli-1.1.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cdbc1fc1bc0bff1cef838eafe581b55bfbffaed4ed0318b724d0b71d4d377619" }, + { file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:890b5a14ce214389b2cc36ce82f3093f96f4cc730c1cffdbefff77a7c71f2a97" }, + { file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ab4fbee0b2d9098c74f3057b2bc055a8bd92ccf02f65944a241b4349229185a" }, + { file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:141bd4d93984070e097521ed07e2575b46f817d08f9fa42b16b9b5f27b5ac088" }, + { file = "Brotli-1.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fce1473f3ccc4187f75b4690cfc922628aed4d3dd013d047f95a9b3919a86596" }, + { file = "Brotli-1.1.0-cp38-cp38-win32.whl", hash = "sha256:db85ecf4e609a48f4b29055f1e144231b90edc90af7481aa731ba2d059226b1b" }, + { file = "Brotli-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:3d7954194c36e304e1523f55d7042c59dc53ec20dd4e9ea9d151f1b62b4415c0" }, + { file = "Brotli-1.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5fb2ce4b8045c78ebbc7b8f3c15062e435d47e7393cc57c25115cfd49883747a" }, + { file = "Brotli-1.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7905193081db9bfa73b1219140b3d315831cbff0d8941f22da695832f0dd188f" }, + { file = "Brotli-1.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a77def80806c421b4b0af06f45d65a136e7ac0bdca3c09d9e2ea4e515367c7e9" }, + { file = "Brotli-1.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dadd1314583ec0bf2d1379f7008ad627cd6336625d6679cf2f8e67081b83acf" }, + { file = "Brotli-1.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:901032ff242d479a0efa956d853d16875d42157f98951c0230f69e69f9c09bac" }, + { file = "Brotli-1.1.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:22fc2a8549ffe699bfba2256ab2ed0421a7b8fadff114a3d201794e45a9ff578" }, + { file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ae15b066e5ad21366600ebec29a7ccbc86812ed267e4b28e860b8ca16a2bc474" }, + { file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:949f3b7c29912693cee0afcf09acd6ebc04c57af949d9bf77d6101ebb61e388c" }, + { file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:89f4988c7203739d48c6f806f1e87a1d96e0806d44f0fba61dba81392c9e474d" }, + { file = "Brotli-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:de6551e370ef19f8de1807d0a9aa2cdfdce2e85ce88b122fe9f6b2b076837e59" }, + { file = "Brotli-1.1.0-cp39-cp39-win32.whl", hash = "sha256:f0d8a7a6b5983c2496e364b969f0e526647a06b075d034f3297dc66f3b360c64" }, + { file = "Brotli-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:cdad5b9014d83ca68c25d2e9444e28e967ef16e80f6b436918c700c117a85467" }, + { file = "Brotli-1.1.0.tar.gz", hash = "sha256:81de08ac11bcb85841e440c13611c00b67d3bf82698314928d0b676362546724" }, +] + +[[package]] +name = "brotlicffi" +version = "1.1.0.0" +description = "Python CFFI bindings to the Brotli library" +optional = false +python-versions = ">=3.7" +files = [ + { file = "brotlicffi-1.1.0.0-cp37-abi3-macosx_10_9_x86_64.whl", hash = "sha256:9b7ae6bd1a3f0df532b6d67ff674099a96d22bc0948955cb338488c31bfb8851" }, + { file = "brotlicffi-1.1.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19ffc919fa4fc6ace69286e0a23b3789b4219058313cf9b45625016bf7ff996b" }, + { file = "brotlicffi-1.1.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9feb210d932ffe7798ee62e6145d3a757eb6233aa9a4e7db78dd3690d7755814" }, + { file = "brotlicffi-1.1.0.0-cp37-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84763dbdef5dd5c24b75597a77e1b30c66604725707565188ba54bab4f114820" }, + { file = "brotlicffi-1.1.0.0-cp37-abi3-win32.whl", hash = "sha256:1b12b50e07c3911e1efa3a8971543e7648100713d4e0971b13631cce22c587eb" }, + { file = "brotlicffi-1.1.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:994a4f0681bb6c6c3b0925530a1926b7a189d878e6e5e38fae8efa47c5d9c613" }, + { file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2e4aeb0bd2540cb91b069dbdd54d458da8c4334ceaf2d25df2f4af576d6766ca" }, + { file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b7b0033b0d37bb33009fb2fef73310e432e76f688af76c156b3594389d81391" }, + { file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:54a07bb2374a1eba8ebb52b6fafffa2afd3c4df85ddd38fcc0511f2bb387c2a8" }, + { file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7901a7dc4b88f1c1475de59ae9be59799db1007b7d059817948d8e4f12e24e35" }, + { file = "brotlicffi-1.1.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ce01c7316aebc7fce59da734286148b1d1b9455f89cf2c8a4dfce7d41db55c2d" }, + { file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:246f1d1a90279bb6069de3de8d75a8856e073b8ff0b09dcca18ccc14cec85979" }, + { file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc4bc5d82bc56ebd8b514fb8350cfac4627d6b0743382e46d033976a5f80fab6" }, + { file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37c26ecb14386a44b118ce36e546ce307f4810bc9598a6e6cb4f7fca725ae7e6" }, + { file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca72968ae4eaf6470498d5c2887073f7efe3b1e7d7ec8be11a06a79cc810e990" }, + { file = "brotlicffi-1.1.0.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:add0de5b9ad9e9aa293c3aa4e9deb2b61e99ad6c1634e01d01d98c03e6a354cc" }, + { file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9b6068e0f3769992d6b622a1cd2e7835eae3cf8d9da123d7f51ca9c1e9c333e5" }, + { file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8557a8559509b61e65083f8782329188a250102372576093c88930c875a69838" }, + { file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2a7ae37e5d79c5bdfb5b4b99f2715a6035e6c5bf538c3746abc8e26694f92f33" }, + { file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391151ec86bb1c683835980f4816272a87eaddc46bb91cbf44f62228b84d8cca" }, + { file = "brotlicffi-1.1.0.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:2f3711be9290f0453de8eed5275d93d286abe26b08ab4a35d7452caa1fef532f" }, + { file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1a807d760763e398bbf2c6394ae9da5815901aa93ee0a37bca5efe78d4ee3171" }, + { file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa8ca0623b26c94fccc3a1fdd895be1743b838f3917300506d04aa3346fd2a14" }, + { file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3de0cf28a53a3238b252aca9fed1593e9d36c1d116748013339f0949bfc84112" }, + { file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6be5ec0e88a4925c91f3dea2bb0013b3a2accda6f77238f76a34a1ea532a1cb0" }, + { file = "brotlicffi-1.1.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:d9eb71bb1085d996244439154387266fd23d6ad37161f6f52f1cd41dd95a3808" }, + { file = "brotlicffi-1.1.0.0.tar.gz", hash = "sha256:b77827a689905143f87915310b93b273ab17888fd43ef350d4832c4a71083c13" }, +] + +[package.dependencies] +cffi = ">=1.0.0" + [[package]] name = "certifi" version = "2023.11.17" @@ -1111,6 +1242,17 @@ docs = ["sphinx"] gmpy = ["gmpy2 (>=2.1.0a4)"] tests = ["pytest (>=4.6)"] +[[package]] +name = "mutagen" +version = "1.47.0" +description = "read and write audio tags for many formats" +optional = false +python-versions = ">=3.7" +files = [ + { file = "mutagen-1.47.0-py3-none-any.whl", hash = "sha256:edd96f50c5907a9539d8e5bba7245f62c9f520aef333d13392a79a4f70aca719" }, + { file = "mutagen-1.47.0.tar.gz", hash = "sha256:719fadef0a978c31b4cf3c956261b3c58b6948b32023078a2117b1de09f0fc99" }, +] + [[package]] name = "mypy-extensions" version = "1.0.0" @@ -1424,6 +1566,47 @@ files = [ { file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206" }, ] +[[package]] +name = "pycryptodomex" +version = "3.19.1" +description = "Cryptographic library for Python" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + { file = "pycryptodomex-3.19.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:b5c336dc698650283ad06f8c0237a984087d0af9f403ff21d633507335628156" }, + { file = "pycryptodomex-3.19.1-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:c9cb88ed323be1aa642b3c17cd5caa1a03c3a8fbad092d48ecefe88e328ffae3" }, + { file = "pycryptodomex-3.19.1-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:0b42e2743893f386dfb58fe24a4c8be5305c3d1c825d5f23d9e63fd0700d1110" }, + { file = "pycryptodomex-3.19.1-cp27-cp27m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10c2eed4efdfa084b602ab922e699a0a2ba82053baebfc8afcaf27489def7955" }, + { file = "pycryptodomex-3.19.1-cp27-cp27m-musllinux_1_1_aarch64.whl", hash = "sha256:e94a7e986b117b72e9472f8eafdd81748dafff30815401f9760f759f1debe9ef" }, + { file = "pycryptodomex-3.19.1-cp27-cp27m-win32.whl", hash = "sha256:23707238b024b36c35dd3428f5af6c1f0c5ef54c21e387a2063633717699b8b2" }, + { file = "pycryptodomex-3.19.1-cp27-cp27m-win_amd64.whl", hash = "sha256:c1ae2fb8d5d6771670436dcc889b293e363c97647a6d31c21eebc12b7b760010" }, + { file = "pycryptodomex-3.19.1-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:d7a77391fd351ff1bdf8475558ddc6e92950218cb905419ee14aa02f370f1054" }, + { file = "pycryptodomex-3.19.1-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:c9332b04bf3f838327087b028f690f4ddb9341eb014a0221e79b9c19a77f7555" }, + { file = "pycryptodomex-3.19.1-cp27-cp27mu-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:beb5f0664f49b6093da179ee8e27c1d670779f50b9ece0886ce491bb8bd63728" }, + { file = "pycryptodomex-3.19.1-cp27-cp27mu-musllinux_1_1_aarch64.whl", hash = "sha256:d45d0d35a238d838b872598fa865bbfb31aaef9aeeda77c68b04ef79f9a469dc" }, + { file = "pycryptodomex-3.19.1-cp35-abi3-macosx_10_9_universal2.whl", hash = "sha256:ed3bdda44cc05dd13eee697ab9bea6928531bb7b218e68e66d0d3eb2ebab043e" }, + { file = "pycryptodomex-3.19.1-cp35-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ae75eea2e908383fd4c659fdcfe9621a72869e3e3ee73904227e93b7f7b80b54" }, + { file = "pycryptodomex-3.19.1-cp35-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:371bbe0be17b4dd8cc0c2f378d75ea33f00d5a39884c09a672016ac40145a5fa" }, + { file = "pycryptodomex-3.19.1-cp35-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96000b837bcd8e3bf86b419924a056c978e45027281e4318650c81c25a3ef6cc" }, + { file = "pycryptodomex-3.19.1-cp35-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:011e859026ecbd15b8e720e8992361186e582cf726c50bde6ff8c0c05e820ddf" }, + { file = "pycryptodomex-3.19.1-cp35-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:76414d39df6b45bcc4f38cf1ba2031e0f4b8e99d1ba3c2eee31ffe1b9f039733" }, + { file = "pycryptodomex-3.19.1-cp35-abi3-musllinux_1_1_i686.whl", hash = "sha256:1c04cfff163c05d033bf28e3c4429d8222796738c7b6c1638b9d7090b904611e" }, + { file = "pycryptodomex-3.19.1-cp35-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:de5a43901e47e7a6938490fc5de3074f6e35c8b481a75b227c0d24d6099bd41d" }, + { file = "pycryptodomex-3.19.1-cp35-abi3-win32.whl", hash = "sha256:f24f49fc6bd706d87048654d6be6c7c967d6836d4879e3a7c439275fab9948ad" }, + { file = "pycryptodomex-3.19.1-cp35-abi3-win_amd64.whl", hash = "sha256:f8b3d9e7c17c1ffc1fa5b11c0bbab8a5df3de8596bb32ad30281b21e5ede4bf5" }, + { file = "pycryptodomex-3.19.1-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:ac562e239d98cfef763866c0aee4586affb0d58c592202f06c87241af99db241" }, + { file = "pycryptodomex-3.19.1-pp27-pypy_73-win32.whl", hash = "sha256:39eb1f82ac3ba3e39d866f38e480e8fa53fcdd22260340f05f54a8188d47d510" }, + { file = "pycryptodomex-3.19.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0bc4b7bfaac56e6dfd62044847443a3d110c7abea7fcb0d68c1aea64ed3a6697" }, + { file = "pycryptodomex-3.19.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dffe067d5fff14dba4d18ff7d459cc2a47576d82dafbff13a8f1199c3353e41" }, + { file = "pycryptodomex-3.19.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aab7941c2ff53eb63cb26252770e4f14386d79ce07baeffbf98a1323c1646545" }, + { file = "pycryptodomex-3.19.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:3f3c58971784fba0e014bc3f8aed1197b86719631e1b597d36d7354be5598312" }, + { file = "pycryptodomex-3.19.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5ca98de2e5ac100e57a7116309723360e8f799f722509e376dc396cdf65eec9c" }, + { file = "pycryptodomex-3.19.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8a97b1acd36e9ce9d4067d94a8be99c458f0eb8070828639302a95cfcf0770b" }, + { file = "pycryptodomex-3.19.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62f51a63d73153482729904381dd2de86800b0733a8814ee8f072fa73e5c92fb" }, + { file = "pycryptodomex-3.19.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:9919a1edd2a83c4dfb69f1d8a4c0c5efde7147ef15b07775633372b80c90b5d8" }, + { file = "pycryptodomex-3.19.1.tar.gz", hash = "sha256:0b7154aff2272962355f8941fd514104a88cb29db2d8f43a29af900d6398eb1c" }, +] + [[package]] name = "pydantic" version = "2.5.3" @@ -2466,6 +2649,87 @@ platformdirs = ">=3.9.1,<5" docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] +[[package]] +name = "websockets" +version = "12.0" +description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" +optional = false +python-versions = ">=3.8" +files = [ + { file = "websockets-12.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d554236b2a2006e0ce16315c16eaa0d628dab009c33b63ea03f41c6107958374" }, + { file = "websockets-12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2d225bb6886591b1746b17c0573e29804619c8f755b5598d875bb4235ea639be" }, + { file = "websockets-12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eb809e816916a3b210bed3c82fb88eaf16e8afcf9c115ebb2bacede1797d2547" }, + { file = "websockets-12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c588f6abc13f78a67044c6b1273a99e1cf31038ad51815b3b016ce699f0d75c2" }, + { file = "websockets-12.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5aa9348186d79a5f232115ed3fa9020eab66d6c3437d72f9d2c8ac0c6858c558" }, + { file = "websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6350b14a40c95ddd53e775dbdbbbc59b124a5c8ecd6fbb09c2e52029f7a9f480" }, + { file = "websockets-12.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:70ec754cc2a769bcd218ed8d7209055667b30860ffecb8633a834dde27d6307c" }, + { file = "websockets-12.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6e96f5ed1b83a8ddb07909b45bd94833b0710f738115751cdaa9da1fb0cb66e8" }, + { file = "websockets-12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4d87be612cbef86f994178d5186add3d94e9f31cc3cb499a0482b866ec477603" }, + { file = "websockets-12.0-cp310-cp310-win32.whl", hash = "sha256:befe90632d66caaf72e8b2ed4d7f02b348913813c8b0a32fae1cc5fe3730902f" }, + { file = "websockets-12.0-cp310-cp310-win_amd64.whl", hash = "sha256:363f57ca8bc8576195d0540c648aa58ac18cf85b76ad5202b9f976918f4219cf" }, + { file = "websockets-12.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5d873c7de42dea355d73f170be0f23788cf3fa9f7bed718fd2830eefedce01b4" }, + { file = "websockets-12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3f61726cae9f65b872502ff3c1496abc93ffbe31b278455c418492016e2afc8f" }, + { file = "websockets-12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed2fcf7a07334c77fc8a230755c2209223a7cc44fc27597729b8ef5425aa61a3" }, + { file = "websockets-12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e332c210b14b57904869ca9f9bf4ca32f5427a03eeb625da9b616c85a3a506c" }, + { file = "websockets-12.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5693ef74233122f8ebab026817b1b37fe25c411ecfca084b29bc7d6efc548f45" }, + { file = "websockets-12.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e9e7db18b4539a29cc5ad8c8b252738a30e2b13f033c2d6e9d0549b45841c04" }, + { file = "websockets-12.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6e2df67b8014767d0f785baa98393725739287684b9f8d8a1001eb2839031447" }, + { file = "websockets-12.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bea88d71630c5900690fcb03161ab18f8f244805c59e2e0dc4ffadae0a7ee0ca" }, + { file = "websockets-12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dff6cdf35e31d1315790149fee351f9e52978130cef6c87c4b6c9b3baf78bc53" }, + { file = "websockets-12.0-cp311-cp311-win32.whl", hash = "sha256:3e3aa8c468af01d70332a382350ee95f6986db479ce7af14d5e81ec52aa2b402" }, + { file = "websockets-12.0-cp311-cp311-win_amd64.whl", hash = "sha256:25eb766c8ad27da0f79420b2af4b85d29914ba0edf69f547cc4f06ca6f1d403b" }, + { file = "websockets-12.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0e6e2711d5a8e6e482cacb927a49a3d432345dfe7dea8ace7b5790df5932e4df" }, + { file = "websockets-12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:dbcf72a37f0b3316e993e13ecf32f10c0e1259c28ffd0a85cee26e8549595fbc" }, + { file = "websockets-12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12743ab88ab2af1d17dd4acb4645677cb7063ef4db93abffbf164218a5d54c6b" }, + { file = "websockets-12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b645f491f3c48d3f8a00d1fce07445fab7347fec54a3e65f0725d730d5b99cb" }, + { file = "websockets-12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9893d1aa45a7f8b3bc4510f6ccf8db8c3b62120917af15e3de247f0780294b92" }, + { file = "websockets-12.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f38a7b376117ef7aff996e737583172bdf535932c9ca021746573bce40165ed" }, + { file = "websockets-12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f764ba54e33daf20e167915edc443b6f88956f37fb606449b4a5b10ba42235a5" }, + { file = "websockets-12.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:1e4b3f8ea6a9cfa8be8484c9221ec0257508e3a1ec43c36acdefb2a9c3b00aa2" }, + { file = "websockets-12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9fdf06fd06c32205a07e47328ab49c40fc1407cdec801d698a7c41167ea45113" }, + { file = "websockets-12.0-cp312-cp312-win32.whl", hash = "sha256:baa386875b70cbd81798fa9f71be689c1bf484f65fd6fb08d051a0ee4e79924d" }, + { file = "websockets-12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ae0a5da8f35a5be197f328d4727dbcfafa53d1824fac3d96cdd3a642fe09394f" }, + { file = "websockets-12.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5f6ffe2c6598f7f7207eef9a1228b6f5c818f9f4d53ee920aacd35cec8110438" }, + { file = "websockets-12.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9edf3fc590cc2ec20dc9d7a45108b5bbaf21c0d89f9fd3fd1685e223771dc0b2" }, + { file = "websockets-12.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8572132c7be52632201a35f5e08348137f658e5ffd21f51f94572ca6c05ea81d" }, + { file = "websockets-12.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:604428d1b87edbf02b233e2c207d7d528460fa978f9e391bd8aaf9c8311de137" }, + { file = "websockets-12.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1a9d160fd080c6285e202327aba140fc9a0d910b09e423afff4ae5cbbf1c7205" }, + { file = "websockets-12.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87b4aafed34653e465eb77b7c93ef058516cb5acf3eb21e42f33928616172def" }, + { file = "websockets-12.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b2ee7288b85959797970114deae81ab41b731f19ebcd3bd499ae9ca0e3f1d2c8" }, + { file = "websockets-12.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7fa3d25e81bfe6a89718e9791128398a50dec6d57faf23770787ff441d851967" }, + { file = "websockets-12.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a571f035a47212288e3b3519944f6bf4ac7bc7553243e41eac50dd48552b6df7" }, + { file = "websockets-12.0-cp38-cp38-win32.whl", hash = "sha256:3c6cc1360c10c17463aadd29dd3af332d4a1adaa8796f6b0e9f9df1fdb0bad62" }, + { file = "websockets-12.0-cp38-cp38-win_amd64.whl", hash = "sha256:1bf386089178ea69d720f8db6199a0504a406209a0fc23e603b27b300fdd6892" }, + { file = "websockets-12.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ab3d732ad50a4fbd04a4490ef08acd0517b6ae6b77eb967251f4c263011a990d" }, + { file = "websockets-12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1d9697f3337a89691e3bd8dc56dea45a6f6d975f92e7d5f773bc715c15dde28" }, + { file = "websockets-12.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1df2fbd2c8a98d38a66f5238484405b8d1d16f929bb7a33ed73e4801222a6f53" }, + { file = "websockets-12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23509452b3bc38e3a057382c2e941d5ac2e01e251acce7adc74011d7d8de434c" }, + { file = "websockets-12.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e5fc14ec6ea568200ea4ef46545073da81900a2b67b3e666f04adf53ad452ec" }, + { file = "websockets-12.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46e71dbbd12850224243f5d2aeec90f0aaa0f2dde5aeeb8fc8df21e04d99eff9" }, + { file = "websockets-12.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b81f90dcc6c85a9b7f29873beb56c94c85d6f0dac2ea8b60d995bd18bf3e2aae" }, + { file = "websockets-12.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a02413bc474feda2849c59ed2dfb2cddb4cd3d2f03a2fedec51d6e959d9b608b" }, + { file = "websockets-12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bbe6013f9f791944ed31ca08b077e26249309639313fff132bfbf3ba105673b9" }, + { file = "websockets-12.0-cp39-cp39-win32.whl", hash = "sha256:cbe83a6bbdf207ff0541de01e11904827540aa069293696dd528a6640bd6a5f6" }, + { file = "websockets-12.0-cp39-cp39-win_amd64.whl", hash = "sha256:fc4e7fa5414512b481a2483775a8e8be7803a35b30ca805afa4998a84f9fd9e8" }, + { file = "websockets-12.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:248d8e2446e13c1d4326e0a6a4e9629cb13a11195051a73acf414812700badbd" }, + { file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44069528d45a933997a6fef143030d8ca8042f0dfaad753e2906398290e2870" }, + { file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c4e37d36f0d19f0a4413d3e18c0d03d0c268ada2061868c1e6f5ab1a6d575077" }, + { file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d829f975fc2e527a3ef2f9c8f25e553eb7bc779c6665e8e1d52aa22800bb38b" }, + { file = "websockets-12.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2c71bd45a777433dd9113847af751aae36e448bc6b8c361a566cb043eda6ec30" }, + { file = "websockets-12.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0bee75f400895aef54157b36ed6d3b308fcab62e5260703add87f44cee9c82a6" }, + { file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:423fc1ed29f7512fceb727e2d2aecb952c46aa34895e9ed96071821309951123" }, + { file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27a5e9964ef509016759f2ef3f2c1e13f403725a5e6a1775555994966a66e931" }, + { file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3181df4583c4d3994d31fb235dc681d2aaad744fbdbf94c4802485ececdecf2" }, + { file = "websockets-12.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b067cb952ce8bf40115f6c19f478dc71c5e719b7fbaa511359795dfd9d1a6468" }, + { file = "websockets-12.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:00700340c6c7ab788f176d118775202aadea7602c5cc6be6ae127761c16d6b0b" }, + { file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e469d01137942849cff40517c97a30a93ae79917752b34029f0ec72df6b46399" }, + { file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffefa1374cd508d633646d51a8e9277763a9b78ae71324183693959cf94635a7" }, + { file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba0cab91b3956dfa9f512147860783a1829a8d905ee218a9837c18f683239611" }, + { file = "websockets-12.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2cb388a5bfb56df4d9a406783b7f9dbefb888c09b71629351cc6b036e9259370" }, + { file = "websockets-12.0-py3-none-any.whl", hash = "sha256:dc284bbc8d7c78a6c69e0c7325ab46ee5e40bb4d50e494d8131a07ef47500e9e" }, + { file = "websockets-12.0.tar.gz", hash = "sha256:81df9cbcbb6c260de1e007e58c011bfebe2dafc8435107b0537f393dd38c8b1b" }, +] + [[package]] name = "whisper" version = "1.1.10" @@ -2558,6 +2822,27 @@ files = [ { file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d" }, ] +[[package]] +name = "yt-dlp" +version = "2023.12.30" +description = "A youtube-dl fork with additional features and patches" +optional = false +python-versions = ">=3.8" +files = [ + { file = "yt-dlp-2023.12.30.tar.gz", hash = "sha256:a11862e57721b0a0f0883dfeb5a4d79ba213a2d4c45e1880e9fd70f8e6570c38" }, + { file = "yt_dlp-2023.12.30-py2.py3-none-any.whl", hash = "sha256:c00d9a71d64472ad441bcaa1ec0c3797d6e60c9f934f270096a96fe51657e7b3" }, +] + +[package.dependencies] +brotli = { version = "*", markers = "implementation_name == \"cpython\"" } +brotlicffi = { version = "*", markers = "implementation_name != \"cpython\"" } +certifi = "*" +mutagen = "*" +pycryptodomex = "*" +requests = ">=2.31.0,<3" +urllib3 = ">=1.26.17,<3" +websockets = ">=12.0" + [[package]] name = "zipp" version = "3.17.0" @@ -2576,4 +2861,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.9.13,<3.11" -content-hash = "4eed17ca12ee91499c96f410ceebac3c8075e2b0ee10c5a45327ca7a01f4e6ab" +content-hash = "4d011bc97fb3eb123631563b28e3f1e55a1a189a3dcbbb7a29f55460945c5982" diff --git a/pyproject.toml b/pyproject.toml index be98c223..19790700 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ dataclasses-json = "^0.5.9" ffmpeg-python = "0.2.0" numpy = "^1.21.2" requests = "^2.31.0" +yt-dlp = "2023.12.30" # Only install on non-Linux to prevent execstack errors stable-ts = { version = "1.0.2", markers = "sys_platform != 'linux'" } diff --git a/tests/cache_test.py b/tests/cache_test.py index f74c3b80..b94f1d88 100644 --- a/tests/cache_test.py +++ b/tests/cache_test.py @@ -1,5 +1,5 @@ from buzz.cache import TasksCache -from buzz.transcriber import ( +from buzz.transcriber.transcriber import ( FileTranscriptionOptions, FileTranscriptionTask, TranscriptionOptions, diff --git a/tests/gui_test.py b/tests/gui_test.py index 58a057c0..e81c3644 100644 --- a/tests/gui_test.py +++ b/tests/gui_test.py @@ -23,7 +23,7 @@ from buzz.widgets.transcriber.languages_combo_box import LanguagesComboBox from buzz.widgets.transcriber.temperature_validator import TemperatureValidator from buzz.widgets.about_dialog import AboutDialog from buzz.settings.settings import Settings -from buzz.transcriber import ( +from buzz.transcriber.transcriber import ( TranscriptionOptions, ) from buzz.widgets.transcriber.transcription_options_group_box import ( diff --git a/tests/transcriber/__init__.py b/tests/transcriber/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/transcriber/openai_whisper_api_file_transcriber_test.py b/tests/transcriber/openai_whisper_api_file_transcriber_test.py new file mode 100644 index 00000000..a24f8b9e --- /dev/null +++ b/tests/transcriber/openai_whisper_api_file_transcriber_test.py @@ -0,0 +1,52 @@ +import os +from unittest.mock import patch, Mock + +import pytest + +from buzz.transcriber.openai_whisper_api_file_transcriber import ( + OpenAIWhisperAPIFileTranscriber, +) +from buzz.transcriber.transcriber import ( + FileTranscriptionTask, + TranscriptionOptions, + FileTranscriptionOptions, +) + + +class TestOpenAIWhisperAPIFileTranscriber: + @pytest.fixture + def mock_openai_client(self): + with patch( + "buzz.transcriber.openai_whisper_api_file_transcriber.OpenAI" + ) as mock: + return_value = {"segments": [{"start": 0, "end": 6.56, "text": "Hello"}]} + mock.return_value.audio.transcriptions.create.return_value = return_value + mock.return_value.audio.translations.create.return_value = return_value + yield mock + + def test_transcribe(self, mock_openai_client): + file_path = "testdata/whisper-french.mp3" + transcriber = OpenAIWhisperAPIFileTranscriber( + task=FileTranscriptionTask( + file_path=file_path, + transcription_options=( + TranscriptionOptions( + openai_access_token=os.getenv("OPENAI_ACCESS_TOKEN"), + ) + ), + file_transcription_options=( + FileTranscriptionOptions(file_paths=[file_path]) + ), + model_path="", + ) + ) + mock_completed = Mock() + transcriber.completed.connect(mock_completed) + transcriber.run() + + called_segments = mock_completed.call_args[0][0] + + assert len(called_segments) == 1 + assert called_segments[0].start == 0 + assert called_segments[0].end == 6560 + assert called_segments[0].text == "Hello" diff --git a/tests/transcriber/recording_transcriber_test.py b/tests/transcriber/recording_transcriber_test.py new file mode 100644 index 00000000..bc1e66de --- /dev/null +++ b/tests/transcriber/recording_transcriber_test.py @@ -0,0 +1,47 @@ +from unittest.mock import Mock, patch + +import pytest +from PyQt6.QtCore import QThread + +from buzz.model_loader import TranscriptionModel, ModelType, WhisperModelSize +from buzz.transcriber.recording_transcriber import RecordingTranscriber +from buzz.transcriber.transcriber import TranscriptionOptions, Task +from tests.mock_sounddevice import MockInputStream + + +class TestRecordingTranscriber: + @pytest.mark.skip(reason="Hanging") + def test_should_transcribe(self, qtbot): + thread = QThread() + + transcription_model = TranscriptionModel( + model_type=ModelType.WHISPER_CPP, whisper_model_size=WhisperModelSize.TINY + ) + + transcriber = RecordingTranscriber( + transcription_options=TranscriptionOptions( + model=transcription_model, language="fr", task=Task.TRANSCRIBE + ), + input_device_index=0, + sample_rate=16_000, + ) + transcriber.moveToThread(thread) + + thread.finished.connect(thread.deleteLater) + + mock_transcription = Mock() + transcriber.transcription.connect(mock_transcription) + + transcriber.finished.connect(thread.quit) + transcriber.finished.connect(transcriber.deleteLater) + + with patch("sounddevice.InputStream", side_effect=MockInputStream), patch( + "sounddevice.check_input_settings" + ), qtbot.wait_signal(transcriber.transcription, timeout=60 * 1000): + thread.start() + + with qtbot.wait_signal(thread.finished, timeout=60 * 1000): + transcriber.stop_recording() + + text = mock_transcription.call_args[0][0] + assert "Bienvenue dans Passe" in text diff --git a/tests/transcriber/transcriber_test.py b/tests/transcriber/transcriber_test.py new file mode 100644 index 00000000..f3df832c --- /dev/null +++ b/tests/transcriber/transcriber_test.py @@ -0,0 +1,43 @@ +import pathlib + +import pytest + +from buzz.transcriber.file_transcriber import write_output, to_timestamp +from buzz.transcriber.transcriber import ( + OutputFormat, + Segment, +) + + +class TestToTimestamp: + def test_to_timestamp(self): + assert to_timestamp(0) == "00:00:00.000" + assert to_timestamp(123456789) == "34:17:36.789" + + +@pytest.mark.parametrize( + "output_format,output_text", + [ + (OutputFormat.TXT, "Bien\nvenue dans\n"), + ( + OutputFormat.SRT, + "1\n00:00:00,040 --> 00:00:00,299\nBien\n\n2\n00:00:00,299 --> 00:00:00,329\nvenue dans\n\n", + ), + ( + OutputFormat.VTT, + "WEBVTT\n\n00:00:00.040 --> 00:00:00.299\nBien\n\n00:00:00.299 --> 00:00:00.329\nvenue dans\n\n", + ), + ], +) +def test_write_output( + tmp_path: pathlib.Path, output_format: OutputFormat, output_text: str +): + output_file_path = tmp_path / "whisper.txt" + segments = [Segment(40, 299, "Bien"), Segment(299, 329, "venue dans")] + + write_output( + path=str(output_file_path), segments=segments, output_format=output_format + ) + + with open(output_file_path, encoding="utf-8") as output_file: + assert output_text == output_file.read() diff --git a/tests/transcriber/whisper_cpp_file_transcriber_test.py b/tests/transcriber/whisper_cpp_file_transcriber_test.py new file mode 100644 index 00000000..ca077b69 --- /dev/null +++ b/tests/transcriber/whisper_cpp_file_transcriber_test.py @@ -0,0 +1,76 @@ +from typing import List +from unittest.mock import Mock + +import pytest +from pytestqt.qtbot import QtBot + +from buzz.model_loader import TranscriptionModel, ModelType, WhisperModelSize +from buzz.transcriber.transcriber import ( + Segment, + FileTranscriptionOptions, + TranscriptionOptions, + Task, + FileTranscriptionTask, +) +from buzz.transcriber.whisper_cpp_file_transcriber import WhisperCppFileTranscriber +from tests.model_loader import get_model_path + + +class TestWhisperCppFileTranscriber: + @pytest.mark.parametrize( + "word_level_timings,expected_segments", + [ + ( + False, + [Segment(0, 6560, "Bienvenue dans Passe-Relle. Un podcast pensé pour")], + ), + (True, [Segment(30, 330, "Bien"), Segment(330, 740, "venue")]), + ], + ) + def test_transcribe( + self, qtbot: QtBot, word_level_timings: bool, expected_segments: List[Segment] + ): + file_transcription_options = FileTranscriptionOptions( + file_paths=["testdata/whisper-french.mp3"] + ) + transcription_options = TranscriptionOptions( + language="fr", + task=Task.TRANSCRIBE, + word_level_timings=word_level_timings, + model=TranscriptionModel( + model_type=ModelType.WHISPER_CPP, + whisper_model_size=WhisperModelSize.TINY, + ), + ) + + model_path = get_model_path(transcription_options.model) + transcriber = WhisperCppFileTranscriber( + task=FileTranscriptionTask( + file_path="testdata/whisper-french.mp3", + transcription_options=transcription_options, + file_transcription_options=file_transcription_options, + model_path=model_path, + ) + ) + mock_progress = Mock(side_effect=lambda value: print("progress: ", value)) + mock_completed = Mock() + mock_error = Mock() + transcriber.progress.connect(mock_progress) + transcriber.completed.connect(mock_completed) + transcriber.error.connect(mock_error) + + with qtbot.wait_signal(transcriber.completed, timeout=10 * 60 * 1000): + transcriber.run() + + mock_error.assert_not_called() + + mock_progress.assert_called() + segments = [ + segment + for segment in mock_completed.call_args[0][0] + if len(segment.text) > 0 + ] + for i, expected_segment in enumerate(expected_segments): + assert expected_segment.start == segments[i].start + assert expected_segment.end == segments[i].end + assert expected_segment.text in segments[i].text diff --git a/tests/transcriber/whisper_cpp_test.py b/tests/transcriber/whisper_cpp_test.py new file mode 100644 index 00000000..7d9bdaa8 --- /dev/null +++ b/tests/transcriber/whisper_cpp_test.py @@ -0,0 +1,25 @@ +from buzz.model_loader import TranscriptionModel, ModelType, WhisperModelSize +from buzz.transcriber.transcriber import TranscriptionOptions, Task +from buzz.transcriber.whisper_cpp import WhisperCpp, whisper_cpp_params +from tests.model_loader import get_model_path + + +class TestWhisperCpp: + def test_transcribe(self): + transcription_options = TranscriptionOptions( + model=TranscriptionModel( + model_type=ModelType.WHISPER_CPP, + whisper_model_size=WhisperModelSize.TINY, + ) + ) + model_path = get_model_path(transcription_options.model) + + whisper_cpp = WhisperCpp(model=model_path) + params = whisper_cpp_params( + language="fr", task=Task.TRANSCRIBE, word_level_timings=False + ) + result = whisper_cpp.transcribe( + audio="testdata/whisper-french.mp3", params=params + ) + + assert "Bienvenue dans Passe" in result["text"] diff --git a/tests/transcriber_test.py b/tests/transcriber/whisper_file_transcriber_test.py similarity index 61% rename from tests/transcriber_test.py rename to tests/transcriber/whisper_file_transcriber_test.py index c8132c63..8c0fbc76 100644 --- a/tests/transcriber_test.py +++ b/tests/transcriber/whisper_file_transcriber_test.py @@ -1,173 +1,30 @@ import logging import os -import pathlib import platform import shutil import sys import tempfile import time from typing import List -from unittest.mock import Mock, patch +from unittest.mock import Mock import pytest -from PyQt6.QtCore import QThread from pytestqt.qtbot import QtBot -from buzz.model_loader import WhisperModelSize, ModelType, TranscriptionModel -from buzz.transcriber import ( - FileTranscriptionOptions, - FileTranscriptionTask, +from buzz.model_loader import TranscriptionModel, ModelType, WhisperModelSize +from buzz.transcriber.transcriber import ( OutputFormat, - Segment, - Task, - WhisperCpp, - WhisperCppFileTranscriber, - WhisperFileTranscriber, get_output_file_path, - to_timestamp, - whisper_cpp_params, - write_output, + FileTranscriptionTask, TranscriptionOptions, - OpenAIWhisperAPIFileTranscriber, + Task, + FileTranscriptionOptions, + Segment, ) -from buzz.recording_transcriber import RecordingTranscriber -from tests.mock_sounddevice import MockInputStream +from buzz.transcriber.whisper_file_transcriber import WhisperFileTranscriber from tests.model_loader import get_model_path - -class TestRecordingTranscriber: - @pytest.mark.skip(reason="Hanging") - def test_should_transcribe(self, qtbot): - thread = QThread() - - transcription_model = TranscriptionModel( - model_type=ModelType.WHISPER_CPP, whisper_model_size=WhisperModelSize.TINY - ) - - transcriber = RecordingTranscriber( - transcription_options=TranscriptionOptions( - model=transcription_model, language="fr", task=Task.TRANSCRIBE - ), - input_device_index=0, - sample_rate=16_000, - ) - transcriber.moveToThread(thread) - - thread.finished.connect(thread.deleteLater) - - mock_transcription = Mock() - transcriber.transcription.connect(mock_transcription) - - transcriber.finished.connect(thread.quit) - transcriber.finished.connect(transcriber.deleteLater) - - with patch("sounddevice.InputStream", side_effect=MockInputStream), patch( - "sounddevice.check_input_settings" - ), qtbot.wait_signal(transcriber.transcription, timeout=60 * 1000): - thread.start() - - with qtbot.wait_signal(thread.finished, timeout=60 * 1000): - transcriber.stop_recording() - - text = mock_transcription.call_args[0][0] - assert "Bienvenue dans Passe" in text - - -class TestOpenAIWhisperAPIFileTranscriber: - @pytest.fixture - def mock_openai_client(self): - with patch("buzz.transcriber.OpenAI") as mock: - return_value = {"segments": [{"start": 0, "end": 6.56, "text": "Hello"}]} - mock.return_value.audio.transcriptions.create.return_value = return_value - mock.return_value.audio.translations.create.return_value = return_value - yield mock - - def test_transcribe(self, mock_openai_client): - file_path = "testdata/whisper-french.mp3" - transcriber = OpenAIWhisperAPIFileTranscriber( - task=FileTranscriptionTask( - file_path=file_path, - transcription_options=( - TranscriptionOptions( - openai_access_token=os.getenv("OPENAI_ACCESS_TOKEN"), - ) - ), - file_transcription_options=( - FileTranscriptionOptions(file_paths=[file_path]) - ), - model_path="", - ) - ) - mock_completed = Mock() - transcriber.completed.connect(mock_completed) - transcriber.run() - - called_segments = mock_completed.call_args[0][0] - - assert len(called_segments) == 1 - assert called_segments[0].start == 0 - assert called_segments[0].end == 6560 - assert called_segments[0].text == "Hello" - - -class TestWhisperCppFileTranscriber: - @pytest.mark.parametrize( - "word_level_timings,expected_segments", - [ - ( - False, - [Segment(0, 6560, "Bienvenue dans Passe-Relle. Un podcast pensé pour")], - ), - (True, [Segment(30, 330, "Bien"), Segment(330, 740, "venue")]), - ], - ) - def test_transcribe( - self, qtbot: QtBot, word_level_timings: bool, expected_segments: List[Segment] - ): - file_transcription_options = FileTranscriptionOptions( - file_paths=["testdata/whisper-french.mp3"] - ) - transcription_options = TranscriptionOptions( - language="fr", - task=Task.TRANSCRIBE, - word_level_timings=word_level_timings, - model=TranscriptionModel( - model_type=ModelType.WHISPER_CPP, - whisper_model_size=WhisperModelSize.TINY, - ), - ) - - model_path = get_model_path(transcription_options.model) - transcriber = WhisperCppFileTranscriber( - task=FileTranscriptionTask( - file_path="testdata/whisper-french.mp3", - transcription_options=transcription_options, - file_transcription_options=file_transcription_options, - model_path=model_path, - ) - ) - mock_progress = Mock(side_effect=lambda value: print("progress: ", value)) - mock_completed = Mock() - mock_error = Mock() - transcriber.progress.connect(mock_progress) - transcriber.completed.connect(mock_completed) - transcriber.error.connect(mock_error) - - with qtbot.wait_signal(transcriber.completed, timeout=10 * 60 * 1000): - transcriber.run() - - mock_error.assert_not_called() - - mock_progress.assert_called() - segments = [ - segment - for segment in mock_completed.call_args[0][0] - if len(segment.text) > 0 - ] - for i, expected_segment in enumerate(expected_segments): - assert expected_segment.start == segments[i].start - assert expected_segment.end == segments[i].end - assert expected_segment.text in segments[i].text +UNSUPPORTED_ON_LINUX_REASON = "Whisper not supported on Linux" class TestWhisperFileTranscriber: @@ -319,10 +176,8 @@ class TestWhisperFileTranscriber: ), ], ) - @pytest.mark.skipif( - sys.platform == "linux", reason="Avoid execstack errors on Snap" - ) - def test_transcribe( + @pytest.mark.skipif(sys.platform == "linux", reason=UNSUPPORTED_ON_LINUX_REASON) + def test_transcribe_from_file( self, qtbot: QtBot, word_level_timings: bool, @@ -338,9 +193,7 @@ class TestWhisperFileTranscriber: model=model, ) model_path = get_model_path(transcription_options.model) - file_path = os.path.abspath( - os.path.join(os.path.dirname(__file__), "..", "testdata/whisper-french.mp3") - ) + file_path = os.path.abspath("testdata/whisper-french.mp3") file_transcription_options = FileTranscriptionOptions(file_paths=[file_path]) transcriber = WhisperFileTranscriber( @@ -371,6 +224,47 @@ class TestWhisperFileTranscriber: assert len(segments[i].text) > 0 logging.debug(f"{segments[i].start} {segments[i].end} {segments[i].text}") + @pytest.mark.skipif(sys.platform == "linux", reason=UNSUPPORTED_ON_LINUX_REASON) + def test_transcribe_from_url(self, qtbot): + url = ( + "https://github.com/chidiwilliams/buzz/raw/main/testdata/whisper-french.mp3" + ) + + mock_progress = Mock() + mock_completed = Mock() + transcription_options = TranscriptionOptions() + model_path = get_model_path(transcription_options.model) + file_transcription_options = FileTranscriptionOptions(url=url) + + transcriber = WhisperFileTranscriber( + task=FileTranscriptionTask( + transcription_options=transcription_options, + file_transcription_options=file_transcription_options, + model_path=model_path, + url=url, + source=FileTranscriptionTask.Source.URL_IMPORT, + ) + ) + transcriber.progress.connect(mock_progress) + transcriber.completed.connect(mock_completed) + with qtbot.wait_signal( + transcriber.progress, timeout=10 * 6000 + ), qtbot.wait_signal(transcriber.completed, timeout=10 * 6000): + transcriber.run() + + # Reports progress at 0, 0 <= progress <= 100, and 100 + assert mock_progress.call_count >= 2 + assert mock_progress.call_args_list[0][0][0] == (0, 100) + + mock_completed.assert_called() + segments = mock_completed.call_args[0][0] + assert len(segments) >= 0 + for i, expected_segment in enumerate(segments): + assert segments[i].start >= 0 + assert segments[i].end > 0 + assert len(segments[i].text) > 0 + logging.debug(f"{segments[i].start} {segments[i].end} {segments[i].text}") + @pytest.mark.skipif( sys.platform == "linux", reason="Avoid execstack errors on Snap" ) @@ -445,58 +339,3 @@ class TestWhisperFileTranscriber: # Assert that file was not created assert os.path.isfile(output_file_path) is False - - -class TestToTimestamp: - def test_to_timestamp(self): - assert to_timestamp(0) == "00:00:00.000" - assert to_timestamp(123456789) == "34:17:36.789" - - -class TestWhisperCpp: - def test_transcribe(self): - transcription_options = TranscriptionOptions( - model=TranscriptionModel( - model_type=ModelType.WHISPER_CPP, - whisper_model_size=WhisperModelSize.TINY, - ) - ) - model_path = get_model_path(transcription_options.model) - - whisper_cpp = WhisperCpp(model=model_path) - params = whisper_cpp_params( - language="fr", task=Task.TRANSCRIBE, word_level_timings=False - ) - result = whisper_cpp.transcribe( - audio="testdata/whisper-french.mp3", params=params - ) - - assert "Bienvenue dans Passe" in result["text"] - - -@pytest.mark.parametrize( - "output_format,output_text", - [ - (OutputFormat.TXT, "Bien\nvenue dans\n"), - ( - OutputFormat.SRT, - "1\n00:00:00,040 --> 00:00:00,299\nBien\n\n2\n00:00:00,299 --> 00:00:00,329\nvenue dans\n\n", - ), - ( - OutputFormat.VTT, - "WEBVTT\n\n00:00:00.040 --> 00:00:00.299\nBien\n\n00:00:00.299 --> 00:00:00.329\nvenue dans\n\n", - ), - ], -) -def test_write_output( - tmp_path: pathlib.Path, output_format: OutputFormat, output_text: str -): - output_file_path = tmp_path / "whisper.txt" - segments = [Segment(40, 299, "Bien"), Segment(299, 329, "venue dans")] - - write_output( - path=str(output_file_path), segments=segments, output_format=output_format - ) - - output_file = open(output_file_path, "r", encoding="utf-8") - assert output_text == output_file.read() diff --git a/tests/widgets/import_url_dialog_test.py b/tests/widgets/import_url_dialog_test.py new file mode 100644 index 00000000..2f1870fb --- /dev/null +++ b/tests/widgets/import_url_dialog_test.py @@ -0,0 +1,22 @@ +from unittest.mock import patch + +from buzz.widgets.import_url_dialog import ImportURLDialog + + +class TestImportURLDialog: + def test_should_show_error_with_invalid_url(self, qtbot): + dialog = ImportURLDialog() + dialog.line_edit.setText("bad-url") + + with patch("PyQt6.QtWidgets.QMessageBox.critical") as mock_critical: + dialog.button_box.button(dialog.button_box.StandardButton.Ok).click() + mock_critical.assert_called_with( + dialog, "Invalid URL", "The URL you entered is invalid." + ) + + def test_should_return_url_with_valid_url(self, qtbot): + dialog = ImportURLDialog() + dialog.line_edit.setText("https://example.com") + + dialog.button_box.button(dialog.button_box.StandardButton.Ok).click() + assert dialog.url == "https://example.com" diff --git a/tests/widgets/main_window_test.py b/tests/widgets/main_window_test.py index 08794fff..3290f0ed 100644 --- a/tests/widgets/main_window_test.py +++ b/tests/widgets/main_window_test.py @@ -4,13 +4,19 @@ from unittest.mock import patch import pytest from PyQt6.QtCore import QSize, Qt -from PyQt6.QtGui import QKeyEvent -from PyQt6.QtWidgets import QTableWidget, QMessageBox, QPushButton, QToolBar +from PyQt6.QtGui import QKeyEvent, QAction +from PyQt6.QtWidgets import ( + QTableWidget, + QMessageBox, + QPushButton, + QToolBar, + QMenuBar, +) from _pytest.fixtures import SubRequest from pytestqt.qtbot import QtBot from buzz.cache import TasksCache -from buzz.transcriber import ( +from buzz.transcriber.transcriber import ( FileTranscriptionTask, TranscriptionOptions, FileTranscriptionOptions, @@ -59,10 +65,10 @@ class TestMainWindow: assert window.windowIcon().pixmap(QSize(64, 64)).isNull() is False window.close() - def test_should_run_transcription_task(self, qtbot: QtBot, tasks_cache): + def test_should_run_file_transcription_task(self, qtbot: QtBot, tasks_cache): window = MainWindow(tasks_cache=tasks_cache) - self._start_new_transcription(window) + self.import_file_and_start_transcription(window) open_transcript_action = self._get_toolbar_action(window, "Open Transcript") assert open_transcript_action.isEnabled() is False @@ -79,11 +85,39 @@ class TestMainWindow: assert open_transcript_action.isEnabled() window.close() + def test_should_run_url_import_file_transcription_task( + self, qtbot: QtBot, tasks_cache + ): + window = MainWindow(tasks_cache=tasks_cache) + menu: QMenuBar = window.menuBar() + file_action = menu.actions()[0] + import_url_action: QAction = file_action.menu().actions()[1] + + with patch( + "buzz.widgets.import_url_dialog.ImportURLDialog.prompt" + ) as prompt_mock: + prompt_mock.return_value = "https://github.com/chidiwilliams/buzz/raw/main/testdata/whisper-french.mp3" + import_url_action.trigger() + + file_transcriber_widget: FileTranscriberWidget = window.findChild( + FileTranscriberWidget + ) + run_button: QPushButton = file_transcriber_widget.findChild(QPushButton) + run_button.click() + + table_widget: QTableWidget = window.findChild(QTableWidget) + qtbot.wait_until( + self.get_assert_task_status_callback(table_widget, 0, "Completed"), + timeout=2 * 60 * 1000, + ) + + window.close() + def test_should_run_and_cancel_transcription_task(self, qtbot, tasks_cache): window = MainWindow(tasks_cache=tasks_cache) qtbot.add_widget(window) - self._start_new_transcription(window, long_audio=True) + self.import_file_and_start_transcription(window, long_audio=True) table_widget: QTableWidget = window.findChild(QTableWidget) @@ -204,7 +238,9 @@ class TestMainWindow: window.close() @staticmethod - def _start_new_transcription(window: MainWindow, long_audio: bool = False): + def import_file_and_start_transcription( + window: MainWindow, long_audio: bool = False + ): with patch( "PyQt6.QtWidgets.QFileDialog.getOpenFileNames" ) as open_file_names_mock: diff --git a/tests/widgets/preferences_dialog/folder_watch_preferences_widget_test.py b/tests/widgets/preferences_dialog/folder_watch_preferences_widget_test.py index 212ba173..5e052bb2 100644 --- a/tests/widgets/preferences_dialog/folder_watch_preferences_widget_test.py +++ b/tests/widgets/preferences_dialog/folder_watch_preferences_widget_test.py @@ -3,7 +3,7 @@ from unittest.mock import Mock from PyQt6.QtWidgets import QCheckBox, QLineEdit from buzz.model_loader import TranscriptionModel -from buzz.transcriber import Task, DEFAULT_WHISPER_TEMPERATURE +from buzz.transcriber.transcriber import Task, DEFAULT_WHISPER_TEMPERATURE from buzz.widgets.preferences_dialog.folder_watch_preferences_widget import ( FolderWatchPreferencesWidget, ) diff --git a/tests/widgets/shortcuts_editor_widget_test.py b/tests/widgets/shortcuts_editor_widget_test.py index ba833fcf..f92c773c 100644 --- a/tests/widgets/shortcuts_editor_widget_test.py +++ b/tests/widgets/shortcuts_editor_widget_test.py @@ -24,6 +24,7 @@ class TestShortcutsEditorWidget: expected = ( ("Open Record Window", "Ctrl+R"), ("Import File", "Ctrl+O"), + ("Import URL", "Ctrl+U"), ("Open Preferences Window", "Ctrl+,"), ("Open Transcript Viewer", "Ctrl+E"), ("Clear History", "Ctrl+S"), diff --git a/tests/widgets/transcription_task_folder_watcher_test.py b/tests/widgets/transcription_task_folder_watcher_test.py index 45554bf0..2cb37a00 100644 --- a/tests/widgets/transcription_task_folder_watcher_test.py +++ b/tests/widgets/transcription_task_folder_watcher_test.py @@ -5,7 +5,7 @@ from tempfile import mkdtemp from pytestqt.qtbot import QtBot from buzz.model_loader import TranscriptionModel, ModelType -from buzz.transcriber import ( +from buzz.transcriber.transcriber import ( Task, DEFAULT_WHISPER_TEMPERATURE, FileTranscriptionTask, diff --git a/tests/widgets/transcription_tasks_table_widget_test.py b/tests/widgets/transcription_tasks_table_widget_test.py index 31356536..99b4bd39 100644 --- a/tests/widgets/transcription_tasks_table_widget_test.py +++ b/tests/widgets/transcription_tasks_table_widget_test.py @@ -2,7 +2,7 @@ import datetime from pytestqt.qtbot import QtBot -from buzz.transcriber import ( +from buzz.transcriber.transcriber import ( FileTranscriptionTask, TranscriptionOptions, FileTranscriptionOptions, diff --git a/tests/widgets/transcription_viewer_test.py b/tests/widgets/transcription_viewer_test.py index 2de2603c..e9bd73f2 100644 --- a/tests/widgets/transcription_viewer_test.py +++ b/tests/widgets/transcription_viewer_test.py @@ -5,7 +5,7 @@ import pytest from PyQt6.QtWidgets import QPushButton, QToolBar from pytestqt.qtbot import QtBot -from buzz.transcriber import ( +from buzz.transcriber.transcriber import ( FileTranscriptionTask, FileTranscriptionOptions, TranscriptionOptions,