mirror of
https://github.com/chidiwilliams/buzz.git
synced 2026-03-14 14:45:46 +01:00
Update documentation (#1337)
This commit is contained in:
parent
85d70c1e64
commit
43214f5c3d
14 changed files with 72 additions and 40 deletions
|
|
@ -52,6 +52,7 @@ Linux versions get also pushed to the snap. To install latest development versio
|
|||
sudo apt-get install --no-install-recommends libyaml-dev libtbb-dev libxkbcommon-x11-0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0 libxcb-render-util0 libxcb-xinerama0 libxcb-shape0 libxcb-cursor0 libportaudio2 gettext libpulse0 ffmpeg
|
||||
```
|
||||
On versions prior to Ubuntu 24.04 install `sudo apt-get install --no-install-recommends libegl1-mesa`
|
||||
|
||||
5. Install the dependencies `uv sync`
|
||||
6. Run Buzz `uv run buzz`
|
||||
|
||||
|
|
|
|||
23
README.md
23
README.md
|
|
@ -13,6 +13,24 @@ OpenAI's [Whisper](https://github.com/openai/whisper).
|
|||

|
||||
[](https://GitHub.com/chidiwilliams/buzz/releases/)
|
||||
|
||||

|
||||
|
||||
## Features
|
||||
- Transcribe audio and video files or Youtube links
|
||||
- Live realtime audio transcription from microphone
|
||||
- Presentation window for easy accessibility during events and presentations
|
||||
- Speech separation before transcription for better accuracy on noisy audio
|
||||
- Speaker identification in transcribed media
|
||||
- Multiple whisper backend support
|
||||
- CUDA acceleration support for Nvidia GPUs
|
||||
- Apple Silicon support for Macs
|
||||
- Vulkan acceleration support for Whisper.cpp on most GPUs, including integrated GPUs
|
||||
- Export transcripts to TXT, SRT, and VTT
|
||||
- Advanced Transcription Viewer with search, playback controls, and speed adjustment
|
||||
- Keyboard shortcuts for efficient navigation
|
||||
- Watch folder for automatic transcription of new files
|
||||
- Command-Line Interface for scripting and automation
|
||||
|
||||
## Installation
|
||||
|
||||
### macOS
|
||||
|
|
@ -40,13 +58,16 @@ To install flatpak, run:
|
|||
flatpak install flathub io.github.chidiwilliams.Buzz
|
||||
```
|
||||
|
||||
[](https://flathub.org/en/apps/io.github.chidiwilliams.Buzz)
|
||||
|
||||
To install snap, run:
|
||||
```shell
|
||||
sudo apt-get install libportaudio2 libcanberra-gtk-module libcanberra-gtk3-module
|
||||
sudo snap install buzz
|
||||
sudo snap connect buzz:password-manager-service
|
||||
```
|
||||
|
||||
[](https://snapcraft.io/buzz)
|
||||
|
||||
### PyPI
|
||||
|
||||
Install [ffmpeg](https://www.ffmpeg.org/download.html)
|
||||
|
|
|
|||
|
|
@ -70,9 +70,8 @@ def _setup_windows_dll_directories():
|
|||
for lib_dir in lib_dirs:
|
||||
try:
|
||||
os.add_dll_directory(str(lib_dir))
|
||||
logger.debug(f"Added DLL directory: {lib_dir}")
|
||||
except (OSError, AttributeError) as e:
|
||||
logger.debug(f"Could not add DLL directory {lib_dir}: {e}")
|
||||
pass
|
||||
|
||||
|
||||
def _preload_linux_libraries():
|
||||
|
|
@ -101,17 +100,15 @@ def _preload_linux_libraries():
|
|||
|
||||
# Skip problematic libraries
|
||||
if any(pattern in lib_file.name for pattern in skip_patterns):
|
||||
logger.debug(f"Skipping library: {lib_file}")
|
||||
continue
|
||||
|
||||
try:
|
||||
# Use RTLD_GLOBAL so symbols are available to other libraries
|
||||
ctypes.CDLL(str(lib_file), mode=ctypes.RTLD_GLOBAL)
|
||||
loaded_libs.add(lib_file.name)
|
||||
logger.debug(f"Preloaded library: {lib_file}")
|
||||
except OSError as e:
|
||||
# Some libraries may have missing dependencies, that's ok
|
||||
logger.debug(f"Could not preload {lib_file}: {e}")
|
||||
pass
|
||||
|
||||
|
||||
def setup_cuda_libraries():
|
||||
|
|
|
|||
|
|
@ -123,6 +123,8 @@ class FileTranscriberQueueWorker(QObject):
|
|||
def separator_progress_callback(progress):
|
||||
self.task_progress.emit(self.current_task, int(progress["segment_offset"] * 100) / int(progress["audio_length"] * 100))
|
||||
|
||||
separator = None
|
||||
separated = None
|
||||
try:
|
||||
separator = demucsApi.Separator(
|
||||
progress=True,
|
||||
|
|
@ -137,6 +139,15 @@ class FileTranscriberQueueWorker(QObject):
|
|||
self.current_task.file_path = str(self.speech_path)
|
||||
except Exception as e:
|
||||
logging.error(f"Error during speech extraction: {e}", exc_info=True)
|
||||
finally:
|
||||
# Release memory used by speech extractor
|
||||
del separator, separated
|
||||
try:
|
||||
import torch
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
logging.debug("Starting next transcription task")
|
||||
self.task_progress.emit(self.current_task, 0)
|
||||
|
|
|
|||
|
|
@ -11,6 +11,9 @@ import subprocess
|
|||
from typing import Optional
|
||||
from platformdirs import user_cache_dir
|
||||
|
||||
# Preload CUDA libraries before importing torch
|
||||
from buzz import cuda_setup # noqa: F401
|
||||
|
||||
import torch
|
||||
import numpy as np
|
||||
import sounddevice
|
||||
|
|
|
|||
|
|
@ -5,6 +5,10 @@ import multiprocessing
|
|||
import re
|
||||
import os
|
||||
import sys
|
||||
|
||||
# Preload CUDA libraries before importing torch - required for subprocess contexts
|
||||
from buzz import cuda_setup # noqa: F401
|
||||
|
||||
import torch
|
||||
import platform
|
||||
import subprocess
|
||||
|
|
@ -123,10 +127,6 @@ class WhisperFileTranscriber(FileTranscriber):
|
|||
def transcribe_whisper(
|
||||
cls, stderr_conn: Connection, task: FileTranscriptionTask
|
||||
) -> None:
|
||||
# Preload CUDA libraries in the subprocess - must be done before importing torch
|
||||
# This is needed because multiprocessing creates a fresh process without the main process's preloaded libraries
|
||||
from buzz import cuda_setup # noqa: F401
|
||||
|
||||
# Patch subprocess on Windows to prevent console window flash
|
||||
# This is needed because multiprocessing spawns a new process without the main process patches
|
||||
if sys.platform == "win32":
|
||||
|
|
|
|||
|
|
@ -3,6 +3,10 @@ import sys
|
|||
import logging
|
||||
import platform
|
||||
import numpy as np
|
||||
|
||||
# Preload CUDA libraries before importing torch
|
||||
from buzz import cuda_setup # noqa: F401
|
||||
|
||||
import torch
|
||||
import requests
|
||||
from typing import Union
|
||||
|
|
@ -225,7 +229,7 @@ class TransformersTranscriber:
|
|||
model, processor, use_8bit = self._load_peft_model(device, torch_dtype)
|
||||
else:
|
||||
use_safetensors = True
|
||||
if os.path.exists(self.model_id):
|
||||
if os.path.isdir(self.model_id):
|
||||
safetensors_files = [f for f in os.listdir(self.model_id) if f.endswith(".safetensors")]
|
||||
use_safetensors = len(safetensors_files) > 0
|
||||
|
||||
|
|
|
|||
|
|
@ -11,9 +11,7 @@ The models are stored:
|
|||
- Mac OS: `~/Library/Caches/Buzz`
|
||||
- Windows: `%USERPROFILE%\AppData\Local\Buzz\Buzz\Cache`
|
||||
|
||||
Paste the location in your file manager to access the models.
|
||||
|
||||
Since Version `1.3.4`, to get to the logs folder go to `Help -> About Buzz` and click on `Show logs` button.
|
||||
Paste the location in your file manager to access the models or go to `Help -> Preferences -> Models` and click on `Show file location` button after downloading some model.
|
||||
|
||||
### 2. What can I try if the transcription runs too slowly?
|
||||
|
||||
|
|
@ -67,7 +65,7 @@ Yes, Buzz can be used without internet connection if you download the necessary
|
|||
|
||||
If a model download was incomplete or corrupted, Buzz may crash. Try to delete the downloaded model files in `Help -> Preferences -> Models` and re-download them.
|
||||
|
||||
If that does not help, check the log file for errors and [report the issue](https://github.com/chidiwilliams/buzz/issues) so we can fix it. The log file is located in `~/Library/Logs/Buzz` (Mac OS) or `%USERPROFILE%\AppData\Local\Buzz\Buzz\Logs` (Windows). On Linux run the Buzz from the command line to see the relevant messages.
|
||||
If that does not help, check the log file for errors and [report the issue](https://github.com/chidiwilliams/buzz/issues) so we can fix it. If possible attach the log file to the issue. Since Version `1.3.4`, to get to the logs folder go to `Help -> About Buzz` and click on `Show logs` button.
|
||||
|
||||
### 9. Where can I get latest development version?
|
||||
|
||||
|
|
|
|||
|
|
@ -18,24 +18,16 @@ OpenAI's [Whisper](https://github.com/openai/whisper).
|
|||
VTT ([Demo](https://www.loom.com/share/cf263b099ac3481082bb56d19b7c87fe))
|
||||
- Transcription and translation from your computer's microphones to text (Resource-intensive and may not be
|
||||
real-time, [Demo](https://www.loom.com/share/564b753eb4d44b55b985b8abd26b55f7))
|
||||
- **Advanced Transcription Viewer** with search, playback controls, and speed adjustment
|
||||
- Presentation window for easy accessibility during events and presentations
|
||||
- [Realtime translation](https://chidiwilliams.github.io/buzz/docs/usage/translations) with OpenAI API compatible AI
|
||||
- [Advanced Transcription Viewer](https://chidiwilliams.github.io/buzz/docs/usage/transcription_viewer)** with search, playback controls, and speed adjustment
|
||||
- **Smart Interface** with conditional visibility and state persistence
|
||||
- **Professional Controls** including loop segments, follow audio, and keyboard shortcuts
|
||||
- Supports [Whisper](https://github.com/openai/whisper#available-models-and-languages),
|
||||
[Whisper.cpp](https://github.com/ggerganov/whisper.cpp), [Faster Whisper](https://github.com/guillaumekln/faster-whisper),
|
||||
[Whisper.cpp](https://github.com/ggerganov/whisper.cpp) (with Vulkan GPU acceleration), [Faster Whisper](https://github.com/guillaumekln/faster-whisper),
|
||||
[Whisper-compatible Hugging Face models](https://huggingface.co/models?other=whisper), and
|
||||
the [OpenAI Whisper API](https://platform.openai.com/docs/api-reference/introduction)
|
||||
- [Command-Line Interface](#command-line-interface)
|
||||
- Available on Mac, Windows, and Linux
|
||||
|
||||
## Transcription Viewer
|
||||
|
||||
Buzz features a powerful transcription viewer that makes it easy to work with your transcriptions:
|
||||
|
||||
- **🔍 Smart Search**: Find text quickly with real-time search and navigation
|
||||
- **🎵 Playback Controls**: Loop segments, follow audio, and adjust playback speed
|
||||
- **⌨️ Keyboard Shortcuts**: Efficient navigation with Ctrl+F, Ctrl+L, and more
|
||||
- **🎨 Clean Interface**: Conditional visibility keeps the interface uncluttered
|
||||
- **💾 State Persistence**: Remembers your preferences between sessions
|
||||
|
||||
[Learn more about the Transcription Viewer →](https://chidiwilliams.github.io/buzz/docs/usage/transcription_viewer)
|
||||
- Speech separation before transcription for better accuracy on noisy audio
|
||||
- [Speaker identification](https://chidiwilliams.github.io/buzz/docs/usage/speaker_identification) in transcribed media
|
||||
- Available on Mac, Windows, and Linux
|
||||
|
|
@ -3,8 +3,8 @@ title: Installation
|
|||
sidebar_position: 2
|
||||
---
|
||||
|
||||
To install Buzz, download the [latest version](https://github.com/chidiwilliams/buzz/releases/latest) for your operating
|
||||
system. Buzz is available on **Mac** (Intel), **Windows**, and **Linux**.
|
||||
To install Buzz, download the latest version for your operating
|
||||
system. Buzz is available on **Mac** (Intel and Apple silicon), **Windows**, and **Linux**.
|
||||
|
||||
### macOS
|
||||
|
||||
|
|
@ -25,6 +25,8 @@ To install flatpak, run:
|
|||
flatpak install flathub io.github.chidiwilliams.Buzz
|
||||
```
|
||||
|
||||
[](https://flathub.org/en/apps/io.github.chidiwilliams.Buzz)
|
||||
|
||||
To install snap, run:
|
||||
```shell
|
||||
sudo apt-get install libportaudio2 libcanberra-gtk-module libcanberra-gtk3-module
|
||||
|
|
@ -34,15 +36,15 @@ sudo snap connect buzz:password-manager-service
|
|||
|
||||
[](https://snapcraft.io/buzz)
|
||||
|
||||
Alternatively, on Ubuntu 20.04 and later, install the dependencies:
|
||||
|
||||
```shell
|
||||
sudo apt-get install libportaudio2
|
||||
```
|
||||
|
||||
## PyPI
|
||||
|
||||
```shell
|
||||
pip install buzz-captions
|
||||
python -m buzz
|
||||
```
|
||||
|
||||
On Linux install system dependencies you may be missing
|
||||
```
|
||||
sudo apt-get install --no-install-recommends libyaml-dev libtbb-dev libxkbcommon-x11-0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0 libxcb-render-util0 libxcb-xinerama0 libxcb-shape0 libxcb-cursor0 libportaudio2 gettext libpulse0 ffmpeg
|
||||
```
|
||||
On versions prior to Ubuntu 24.04 install `sudo apt-get install --no-install-recommends libegl1-mesa`
|
||||
|
|
|
|||
|
|
@ -18,6 +18,8 @@ To start a live recording:
|
|||
|
||||
[](https://www.loom.com/share/564b753eb4d44b55b985b8abd26b55f7 "Live Recording on Buzz")
|
||||
|
||||
**Presentation Window** Since 1.4.2 Buzz has an easy to use presentation window you can use to show live transcriptions during events and presentations. To open it start the recording and new options for the `Presentation window` will appear.
|
||||
|
||||
### Record audio playing from computer (macOS)
|
||||
|
||||
To record audio playing from an application on your computer, you may install an audio loopback driver (a program that
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ description = ""
|
|||
authors = [{ name = "Chidi Williams", email = "williamschidi1@gmail.com" }]
|
||||
requires-python = ">=3.12,<3.13"
|
||||
readme = "README.md"
|
||||
# License format change to remove warning in PyPI will cause snap not to build
|
||||
license = { text = "MIT" }
|
||||
dependencies = [
|
||||
"sounddevice>=0.5.3,<0.6",
|
||||
|
|
@ -180,7 +181,7 @@ sources = {"demucs_repo/demucs" = "demucs"}
|
|||
[tool.hatch.build.hooks.custom]
|
||||
|
||||
[build-system]
|
||||
requires = ["hatchling", "cmake>=4.2.0,<5", "polib>=1.2.0,<2", "pybind11", "setuptools>=42"]
|
||||
requires = ["hatchling", "cmake>=4.2.0,<5", "polib>=1.2.0,<2", "pybind11", "setuptools>=80.9.0"]
|
||||
build-backend = "hatchling.build"
|
||||
|
||||
[tool.ruff]
|
||||
|
|
|
|||
|
|
@ -46,7 +46,6 @@ brew install --cask buzz
|
|||
```shell
|
||||
sudo apt-get install libportaudio2 libcanberra-gtk-module libcanberra-gtk3-module
|
||||
sudo snap install buzz
|
||||
sudo snap connect buzz:password-manager-service
|
||||
```
|
||||
|
||||
### 最新开发者版本
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@
|
|||
<url type="bugtracker">https://github.com/chidiwilliams/buzz/issues</url>
|
||||
<url type="homepage">https://github.com/chidiwilliams/buzz</url>
|
||||
<url type="faq">https://chidiwilliams.github.io/buzz/docs</url>
|
||||
<url type="vcs-browser">https://github.com/chidiwilliams/buzz</url>
|
||||
|
||||
<branding>
|
||||
<color type="primary" scheme_preference="light">#f66151</color>
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue