Compare commits

..

No commits in common. "main" and "v0.5.8" have entirely different histories.

310 changed files with 4117 additions and 76222 deletions

View file

@ -1,19 +1,11 @@
[run]
omit =
buzz/whisper_cpp/*
buzz/transcriber/local_whisper_cpp_server_transcriber.py
whisper_cpp.py
*_test.py
demucs/*
whisper_diarization/*
deepmultilingualpunctuation/*
ctc_forced_aligner/*
[report]
exclude_also =
if sys.platform == "win32":
if platform.system\(\) == "Windows":
if platform.system\(\) == "Linux":
if platform.system\(\) == "Darwin":
stable_ts/*
[html]
directory = coverage/html
[report]
fail_under = 75

View file

@ -1,10 +1,11 @@
---
name: CI
on:
push:
branches:
- main
tags:
- "*"
- '*'
pull_request:
concurrency:
@ -14,389 +15,120 @@ concurrency:
jobs:
test:
runs-on: ${{ matrix.os }}
env:
BUZZ_DISABLE_TELEMETRY: true
strategy:
fail-fast: false
matrix:
include:
- os: macos-15-intel
- os: macos-latest
- os: windows-latest
- os: ubuntu-22.04
- os: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: recursive
# Should be removed with next update to whisper.cpp
- name: Downgrade Xcode
uses: maxim-lobanov/setup-xcode@v1
- uses: actions/setup-python@v4
with:
xcode-version: '16.0.0'
if: matrix.os == 'macos-latest'
python-version: '3.9.13'
- name: Set up Python
uses: actions/setup-python@v5
- name: Install Poetry Action
uses: snok/install-poetry@v1.3.1
with:
python-version: "3.12"
- name: Install Vulkan SDK
if: "startsWith(matrix.os, 'ubuntu-') || matrix.os == 'windows-latest'"
uses: humbletim/install-vulkan-sdk@v1.2
with:
version: 1.4.309.0
cache: true
- name: Install uv
uses: astral-sh/setup-uv@v6
virtualenvs-create: true
virtualenvs-in-project: true
- name: Load cached venv
id: cached-uv-dependencies
uses: actions/cache@v4
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('**/uv.lock') }}
key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}-2
- uses: AnimMouse/setup-ffmpeg@v1
- name: Load cached Whisper models
id: cached-whisper-models
uses: actions/cache@v3
with:
path: |
~/Library/Caches/Buzz
~/.cache/whisper
key: whisper-models-${{ runner.os }}
- uses: FedericoCarboni/setup-ffmpeg@v1
id: setup-ffmpeg
with:
version: ${{ matrix.os == 'macos-15-intel' && '7.1.1' || matrix.os == 'macos-latest' && '80' || '8.0' }}
- name: Test ffmpeg
run: ffmpeg -i ./testdata/audio-long.mp3 ./testdata/audio-long.wav
- name: Add msbuild to PATH
uses: microsoft/setup-msbuild@v2
if: runner.os == 'Windows'
- name: Install apt dependencies
run: |
sudo apt-get update
if [ "$(lsb_release -rs)" == "22.04" ]; then
sudo apt-get install libegl1-mesa
# Add ubuntu-toolchain-r PPA for newer libstdc++6 with GLIBCXX_3.4.32
sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y
sudo apt-get update
sudo apt-get install -y libstdc++6
fi
sudo apt-get install libyaml-dev libxkbcommon-x11-0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0 libxcb-render-util0 libxcb-xinerama0 libxcb-shape0 libxcb-cursor0 libportaudio2 gettext libpulse0 libgl1-mesa-dev libvulkan-dev ccache
if: "startsWith(matrix.os, 'ubuntu-')"
- name: Install dependencies
run: uv sync
token: ${{ secrets.GITHUB_TOKEN }}
- run: poetry install
- name: Test
run: |
uv run make test
shell: bash
env:
PYTHONFAULTHANDLER: "1"
- name: Upload coverage reports to Codecov with GitHub Action
uses: codecov/codecov-action@v4
with:
flags: ${{ runner.os }}
token: ${{ secrets.CODECOV_TOKEN }}
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
run: poetry run make test
build:
runs-on: ${{ matrix.os }}
timeout-minutes: 90
env:
BUZZ_DISABLE_TELEMETRY: true
strategy:
fail-fast: false
matrix:
include:
- os: macos-15-intel
- os: macos-latest
- os: ubuntu-latest
- os: windows-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: recursive
# Should be removed with next update to whisper.cpp
- name: Downgrade Xcode
uses: maxim-lobanov/setup-xcode@v1
- uses: actions/setup-python@v4
with:
xcode-version: '16.0.0'
if: matrix.os == 'macos-latest'
python-version: '3.9.13'
- name: Set up Python
uses: actions/setup-python@v5
- name: Install Poetry Action
uses: snok/install-poetry@v1.3.1
with:
python-version: "3.12"
- name: Install Vulkan SDK
if: "startsWith(matrix.os, 'ubuntu-') || matrix.os == 'windows-latest'"
uses: humbletim/install-vulkan-sdk@v1.2
with:
version: 1.4.309.0
cache: true
- name: Install uv
uses: astral-sh/setup-uv@v6
virtualenvs-create: true
virtualenvs-in-project: true
- name: Load cached venv
id: cached-uv-dependencies
uses: actions/cache@v4
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ runner.arch }}-${{ hashFiles('**/uv.lock') }}
key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}-2
- name: Install Inno Setup on Windows
uses: crazy-max/ghaction-chocolatey@v3
with:
args: install innosetup --yes
if: runner.os == 'Windows'
- name: Install apt dependencies
run: |
sudo apt-get update
if [ "$(lsb_release -rs)" == "22.04" ]; then
sudo apt-get install libegl1-mesa
# Add ubuntu-toolchain-r PPA for newer libstdc++6 with GLIBCXX_3.4.32
sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y
sudo apt-get update
sudo apt-get install -y libstdc++6
fi
sudo apt-get install libyaml-dev libxkbcommon-x11-0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0 libxcb-render-util0 libxcb-xinerama0 libxcb-shape0 libxcb-cursor0 libportaudio2 gettext libpulse0 libgl1-mesa-dev libvulkan-dev ccache
if: "startsWith(matrix.os, 'ubuntu-')"
- name: Install dependencies
run: uv sync
- uses: AnimMouse/setup-ffmpeg@v1
- uses: FedericoCarboni/setup-ffmpeg@v1
id: setup-ffmpeg
with:
version: ${{ matrix.os == 'macos-15-intel' && '7.1.1' || matrix.os == 'macos-latest' && '80' || '8.0' }}
- name: Install MSVC for Windows
run: |
if [ "$RUNNER_OS" == "Windows" ]; then
uv add msvc-runtime
uv pip install -U torch==2.8.0+cu129 torchaudio==2.8.0+cu129 --index-url https://download.pytorch.org/whl/cu129
uv pip install nvidia-cublas-cu12==12.9.1.4 nvidia-cuda-cupti-cu12==12.9.79 nvidia-cuda-runtime-cu12==12.9.79 --extra-index-url https://pypi.ngc.nvidia.com
uv cache clean
uv run pip cache purge
fi
shell: bash
- name: Add msbuild to PATH
uses: microsoft/setup-msbuild@v2
if: runner.os == 'Windows'
- uses: ruby/setup-ruby@v1
with:
ruby-version: "3.0"
bundler-cache: true
if: "startsWith(matrix.os, 'ubuntu-')"
- name: Install FPM
run: gem install fpm
if: "startsWith(matrix.os, 'ubuntu-')"
- name: Clear space on Windows
if: runner.os == 'Windows'
run: |
rm 'C:\Android\android-sdk\' -r -force
rm 'C:\Program Files (x86)\Google\' -r -force
rm 'C:\tools\kotlinc\' -r -force
rm 'C:\tools\php\' -r -force
rm 'C:\selenium\' -r -force
shell: pwsh
token: ${{ secrets.GITHUB_TOKEN }}
- run: poetry install
- name: Bundle
run: |
if [ "$RUNNER_OS" == "macOS" ]; then
brew install create-dmg
sudo pkill -9 XProtect >/dev/null || true;
while pgrep XProtect; do sleep 3; done;
CERTIFICATE_PATH=$RUNNER_TEMP/build_certificate.p12
KEYCHAIN_PATH=$RUNNER_TEMP/app-signing.keychain-db
echo -n "$BUILD_CERTIFICATE_BASE64" | base64 --decode -o $CERTIFICATE_PATH
security create-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH
security set-keychain-settings -lut 21600 $KEYCHAIN_PATH
security unlock-keychain -p "$KEYCHAIN_PASSWORD" $KEYCHAIN_PATH
security import $CERTIFICATE_PATH -P "$P12_PASSWORD" -A -t cert -f pkcs12 -k $KEYCHAIN_PATH
security list-keychain -d user -s $KEYCHAIN_PATH
xcrun notarytool store-credentials --apple-id "$APPLE_ID" --password "$APPLE_APP_PASSWORD" --team-id "$APPLE_TEAM_ID" notarytool --validate
uv run make bundle_mac
poetry run make bundle_mac
elif [ "$RUNNER_OS" == "Windows" ]; then
cp -r ./dll_backup ./buzz/
uv run make bundle_windows
poetry run make dist/Buzz.exe dist/Buzz-windows.exe
elif [ "$RUNNER_OS" == "Linux" ]; then
poetry run make bundle_linux
fi
env:
BUZZ_CODESIGN_IDENTITY: ${{ secrets.BUZZ_CODESIGN_IDENTITY }}
BUZZ_KEYCHAIN_NOTARY_PROFILE: ${{ secrets.BUZZ_KEYCHAIN_NOTARY_PROFILE }}
BUILD_CERTIFICATE_BASE64: ${{ secrets.BUILD_CERTIFICATE_BASE64 }}
KEYCHAIN_PASSWORD: ${{ secrets.KEYCHAIN_PASSWORD }}
P12_PASSWORD: ${{ secrets.P12_PASSWORD }}
APPLE_ID: ${{ secrets.APPLE_ID }}
APPLE_APP_PASSWORD: ${{ secrets.APPLE_APP_PASSWORD }}
APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }}
BUZZ_VERSION: ${{ github.ref_name }}
shell: bash
- uses: actions/upload-artifact@v4
- uses: actions/upload-artifact@v3
with:
name: Buzz-${{ runner.os }}-${{ runner.arch }}
name: Buzz-${{ runner.os }}
path: |
dist/Buzz*-windows.exe
dist/Buzz*-windows-*.bin
dist/Buzz*-mac.dmg
build_wheels:
runs-on: ${{ matrix.os }}
env:
BUZZ_DISABLE_TELEMETRY: true
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-15-intel, macos-latest]
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
# Should be removed with next update to whisper.cpp
- name: Downgrade Xcode
uses: maxim-lobanov/setup-xcode@v1
with:
xcode-version: '16.0.0'
if: matrix.os == 'macos-latest'
- name: Install Vulkan SDK
if: "startsWith(matrix.os, 'ubuntu-') || matrix.os == 'windows-latest'"
uses: humbletim/install-vulkan-sdk@v1.2
with:
version: 1.4.309.0
cache: true
- name: Install uv
uses: astral-sh/setup-uv@v6
- name: Build wheels
run: uv build --wheel
shell: bash
- uses: actions/upload-artifact@v4
with:
name: buzz-wheel-${{ runner.os }}-${{ runner.arch }}
path: ./dist/*.whl
publish_pypi:
needs: [build_wheels, test]
runs-on: ubuntu-latest
env:
BUZZ_DISABLE_TELEMETRY: true
environment: pypi
permissions:
id-token: write
if: startsWith(github.ref, 'refs/tags/')
steps:
- uses: actions/download-artifact@v4
with:
pattern: buzz-wheel-*
path: dist
merge-multiple: true
- uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true
password: ${{ secrets.PYPI_TOKEN }}
dist/Buzz*.tar.gz
dist/Buzz*.zip
dist/Buzz*.exe
release:
runs-on: ${{ matrix.os }}
env:
BUZZ_DISABLE_TELEMETRY: true
strategy:
fail-fast: false
matrix:
include:
- os: macos-15-intel
- os: macos-latest
- os: windows-latest
runs-on: ubuntu-latest
needs: [build, test]
if: startsWith(github.ref, 'refs/tags/')
steps:
- uses: actions/checkout@v4
- uses: actions/download-artifact@v3
with:
submodules: recursive
- uses: actions/download-artifact@v4
name: Buzz-Linux
- uses: actions/download-artifact@v3
with:
name: Buzz-${{ runner.os }}-${{ runner.arch }}
- name: Rename .dmg files
if: runner.os == 'macOS'
run: |
for file in Buzz*.dmg; do
mv "$file" "${file%.dmg}-${{ runner.arch }}.dmg"
done
name: Buzz-Windows
- name: Release
uses: softprops/action-gh-release@v2
uses: softprops/action-gh-release@v1
with:
files: |
Buzz*-unix.tar.gz
Buzz*.exe
Buzz*.bin
Buzz*.tar.gz
Buzz*.dmg
# Brew Cask deployment fails and the app is deprecated on Brew.
# deploy_brew_cask:
# runs-on: macos-latest
# env:
# BUZZ_DISABLE_TELEMETRY: true
# needs: [release]
# if: startsWith(github.ref, 'refs/tags/')
# steps:
# - uses: actions/checkout@v4
# with:
# submodules: recursive
#
# # Should be removed with next update to whisper.cpp
# - name: Downgrade Xcode
# uses: maxim-lobanov/setup-xcode@v1
# with:
# xcode-version: '16.0.0'
# if: matrix.os == 'macos-latest'
#
# - name: Install uv
# uses: astral-sh/setup-uv@v6
#
# - name: Set up Python
# uses: actions/setup-python@v5
# with:
# python-version: "3.12"
#
# - name: Install dependencies
# run: uv sync
#
# - name: Upload to Brew
# run: uv run make upload_brew
# env:
# HOMEBREW_GITHUB_API_TOKEN: ${{ secrets.HOMEBREW_GITHUB_API_TOKEN }}
Buzz.exe

View file

@ -1,32 +0,0 @@
---
name: GitHub Pages
on:
push:
branches:
- main
jobs:
deploy:
name: Deploy
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: 18
cache: npm
cache-dependency-path: docs/package-lock.json
- name: Install dependencies
run: npm ci
working-directory: docs
- name: Build
run: npm run build
working-directory: docs
- name: Deploy to GitHub Pages
uses: peaceiris/actions-gh-pages@v4
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./docs/build

View file

@ -1,94 +0,0 @@
---
name: Manual Build
on: workflow_dispatch
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
build:
runs-on: ${{ matrix.os }}
env:
BUZZ_DISABLE_TELEMETRY: true
strategy:
fail-fast: false
matrix:
include:
- os: macos-latest
- os: windows-latest
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: actions/setup-python@v5
with:
python-version: "3.11.9"
- name: Install Poetry Action
uses: snok/install-poetry@v1.3.1
with:
virtualenvs-create: true
virtualenvs-in-project: true
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v4
with:
path: .venv
key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}-2
- uses: FedericoCarboni/setup-ffmpeg@v3.1
id: setup-ffmpeg
with:
ffmpeg-version: release
architecture: 'x64'
github-token: ${{ github.server_url == 'https://github.com' && github.token || '' }}
- name: Install dependencies
run: poetry install
- name: Bundle
run: |
if [ "$RUNNER_OS" == "macOS" ]; then
brew install create-dmg
poetry run make bundle_mac_unsigned
elif [ "$RUNNER_OS" == "Windows" ]; then
poetry run make bundle_windows
fi
shell: bash
- uses: actions/upload-artifact@v4
with:
name: Buzz-${{ runner.os }}
path: |
dist/Buzz*-windows.exe
dist/Buzz*-mac.dmg
build-snap:
runs-on: ubuntu-latest
env:
BUZZ_DISABLE_TELEMETRY: true
outputs:
snap: ${{ steps.snapcraft.outputs.snap }}
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: snapcore/action-build@v1
id: snapcraft
- run: |
sudo apt-get update
sudo apt-get install libportaudio2
- run: sudo snap install --devmode *.snap
- run: |
cd $HOME
xvfb-run buzz --version
- uses: actions/upload-artifact@v4
with:
name: snap
path: ${{ steps.snapcraft.outputs.snap }}

View file

@ -1,106 +0,0 @@
---
name: Snapcraft
on:
push:
branches:
- main
tags:
- "*"
pull_request:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
build:
runs-on: ubuntu-24.04
timeout-minutes: 90
env:
BUZZ_DISABLE_TELEMETRY: true
outputs:
snap: ${{ steps.snapcraft.outputs.snap }}
steps:
# Ideas from https://github.com/orgs/community/discussions/25678
- name: Remove unused build tools
run: |
sudo apt-get remove -y azure-cli google-cloud-sdk hhvm google-chrome-stable firefox powershell mono-devel || true
sudo apt-get autoremove -y
sudo apt-get clean
python -m pip cache purge
rm -rf /opt/hostedtoolcache || true
- name: Check available disk space
run: |
echo "=== Disk space ==="
df -h
echo "=== Memory ==="
free -h
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install Snapcraft and dependencies
run: |
set -x
# Ensure snapd is ready
sudo systemctl start snapd.socket
sudo snap wait system seed.loaded
echo "=== Installing snapcraft ==="
sudo snap install --classic snapcraft
echo "=== Installing gnome extension dependencies ==="
sudo snap install gnome-46-2404 || { echo "Failed to install gnome-46-2404"; sudo journalctl -u snapd --no-pager -n 50; exit 1; }
sudo snap install gnome-46-2404-sdk || { echo "Failed to install gnome-46-2404-sdk"; sudo journalctl -u snapd --no-pager -n 50; exit 1; }
echo "=== Installing build-snaps ==="
sudo snap install --classic astral-uv || { echo "Failed to install astral-uv"; sudo journalctl -u snapd --no-pager -n 50; exit 1; }
echo "=== Installed snaps ==="
snap list
- name: Check disk space before build
run: df -h
- name: Build snap
id: snapcraft
env:
SNAPCRAFT_BUILD_ENVIRONMENT: host
run: |
sudo -E snapcraft pack --verbose --destructive-mode
echo "snap=$(ls *.snap)" >> $GITHUB_OUTPUT
- run: sudo snap install --devmode *.snap
- run: |
cd $HOME
xvfb-run buzz --version
- uses: actions/upload-artifact@v4
with:
name: snap
path: ${{ steps.snapcraft.outputs.snap }}
upload-edge:
runs-on: ubuntu-latest
needs: [ build ]
if: github.ref == 'refs/heads/main'
steps:
- uses: actions/download-artifact@v4
with:
name: snap
- uses: snapcore/action-publish@v1
env:
SNAPCRAFT_STORE_CREDENTIALS: ${{ secrets.SNAPCRAFT_TOKEN }}
with:
snap: ${{ needs.build.outputs.snap }}
release: edge
upload-stable:
runs-on: ubuntu-latest
needs: [ build ]
if: startsWith(github.ref, 'refs/tags/')
steps:
- uses: actions/download-artifact@v4
with:
name: snap
- uses: snapcore/action-publish@v1
env:
SNAPCRAFT_STORE_CREDENTIALS: ${{ secrets.SNAPCRAFT_TOKEN }}
with:
snap: ${{ needs.build.outputs.snap }}
release: stable

31
.gitignore vendored
View file

@ -5,32 +5,7 @@ build/
.coverage*
!.coveragerc
.env
.DS_Store
htmlcov/
coverage.xml
.idea/
.venv/
venv/
.claude/
# whisper_cpp
whisper_cpp
*.exe
*.dll
*.dylib
*.so
buzz/whisper_cpp/*
# Internationalization - compiled binaries
*.mo
*.po~
benchmarks.json
.eggs
*.egg-info
/coverage/
/wheelhouse/
/.flatpak-builder
/repo
/nemo_msdd_configs
libwhisper.*
whisper.dll
whisper_cpp.py

18
.gitmodules vendored
View file

@ -1,15 +1,7 @@
[submodule "whisper.cpp"]
path = whisper.cpp
url = https://github.com/ggerganov/whisper.cpp
[submodule "whisper_diarization"]
path = whisper_diarization
url = https://github.com/MahmoudAshraf97/whisper-diarization
[submodule "demucs_repo"]
path = demucs_repo
url = https://github.com/MahmoudAshraf97/demucs.git
[submodule "deepmultilingualpunctuation"]
path = deepmultilingualpunctuation
url = https://github.com/oliverguhr/deepmultilingualpunctuation.git
[submodule "ctc_forced_aligner"]
path = ctc_forced_aligner
url = https://github.com/MahmoudAshraf97/ctc-forced-aligner.git
url = https://github.com/chidiwilliams/whisper.cpp
[submodule "stable_ts"]
path = stable_ts
url = https://github.com/chidiwilliams/stable-ts
branch = main

View file

@ -1,16 +0,0 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.1.3
hooks:
- id: ruff
args: [ --fix, --exit-non-zero-on-fix ]
- id: ruff-format

View file

@ -2,3 +2,4 @@
disable=
C0114, # missing-module-docstring
C0116, # missing-function-docstring
C0115, # missing-class-docstring

View file

@ -1 +0,0 @@
3.12

View file

@ -1,30 +0,0 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="pytest" type="tests" factoryName="py.test" nameIsGenerated="true">
<module name="buzz" />
<option name="ENV_FILES" value="" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<option name="SDK_HOME" value="$PROJECT_DIR$/.venv/bin/python" />
<option name="SDK_NAME" value="Poetry (buzz) (2)" />
<option name="WORKING_DIRECTORY" value="" />
<option name="IS_MODULE_SDK" value="false" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="net.ashald.envfile">
<option name="IS_ENABLED" value="false" />
<option name="IS_SUBST" value="false" />
<option name="IS_PATH_MACRO_SUPPORTED" value="false" />
<option name="IS_IGNORE_MISSING_FILES" value="false" />
<option name="IS_ENABLE_EXPERIMENTAL_INTEGRATIONS" value="false" />
<ENTRIES>
<ENTRY IS_ENABLED="true" PARSER="runconfig" IS_EXECUTABLE="false" />
</ENTRIES>
</EXTENSION>
<option name="_new_keywords" value="&quot;&quot;" />
<option name="_new_parameters" value="&quot;&quot;" />
<option name="_new_additionalArguments" value="&quot;-s&quot;" />
<option name="_new_target" value="&quot;&quot;" />
<option name="_new_targetType" value="&quot;CUSTOM&quot;" />
<method v="2" />
</configuration>
</component>

View file

@ -1,8 +1,6 @@
{
"files.associations": {
".coveragerc": "ini",
"Buzz.spec": "python",
"iosfwd": "cpp"
"Buzz.spec": "python"
},
"files.exclude": {
"**/.git": true,

162
Buzz.spec
View file

@ -1,132 +1,40 @@
# -*- mode: python ; coding: utf-8 -*-
import os
import os.path
import platform
import shutil
from PyInstaller.utils.hooks import collect_data_files, copy_metadata
from buzz.__version__ import VERSION
datas = []
datas += collect_data_files("torch")
datas += collect_data_files("demucs")
datas += copy_metadata("tqdm")
datas += copy_metadata("torch")
datas += copy_metadata("regex")
datas += copy_metadata("requests")
datas += copy_metadata("packaging")
datas += copy_metadata("filelock")
datas += copy_metadata("numpy")
datas += copy_metadata("tokenizers")
datas += copy_metadata("huggingface-hub")
datas += copy_metadata("safetensors")
datas += copy_metadata("pyyaml")
datas += copy_metadata("julius")
datas += copy_metadata("openunmix")
datas += copy_metadata("lameenc")
datas += copy_metadata("diffq")
datas += copy_metadata("einops")
datas += copy_metadata("hydra-core")
datas += copy_metadata("hydra-colorlog")
datas += copy_metadata("museval")
datas += copy_metadata("submitit")
datas += copy_metadata("treetable")
datas += copy_metadata("soundfile")
datas += copy_metadata("dora-search")
datas += copy_metadata("lhotse")
datas += collect_data_files('torch')
datas += copy_metadata('tqdm')
datas += copy_metadata('torch')
datas += copy_metadata('regex')
datas += copy_metadata('requests')
datas += copy_metadata('packaging')
datas += copy_metadata('filelock')
datas += copy_metadata('numpy')
datas += copy_metadata('tokenizers')
datas += collect_data_files('whisper')
datas += [('whisper.dll' if platform.system() ==
'Windows' else 'libwhisper.*', '.')]
datas += [('assets/buzz.ico', 'assets')]
datas += [('assets/buzz-icon-1024.png', 'assets')]
datas += [(shutil.which('ffmpeg'), '.')]
# Allow transformers package to load __init__.py file dynamically:
# https://github.com/chidiwilliams/buzz/issues/272
datas += collect_data_files("transformers", include_py_files=True)
datas += collect_data_files("faster_whisper", include_py_files=True)
datas += collect_data_files("stable_whisper", include_py_files=True)
datas += collect_data_files("whisper")
datas += collect_data_files("demucs", include_py_files=True)
datas += collect_data_files("whisper_diarization", include_py_files=True)
datas += collect_data_files("deepmultilingualpunctuation", include_py_files=True)
datas += collect_data_files("ctc_forced_aligner", include_py_files=True, excludes=["build"])
datas += collect_data_files("nemo", include_py_files=True)
datas += collect_data_files("lightning_fabric", include_py_files=True)
datas += collect_data_files("pytorch_lightning", include_py_files=True)
datas += [("buzz/assets/*", "assets")]
datas += [("buzz/locale", "locale")]
datas += [("buzz/schema.sql", ".")]
block_cipher = None
DEBUG = os.environ.get("PYINSTALLER_DEBUG", "").lower() in ["1", "true"]
if DEBUG:
options = [("v", None, "OPTION")]
else:
options = []
def find_dependency(name: str) -> str:
paths = os.environ["PATH"].split(os.pathsep)
candidates = []
for path in paths:
exe_path = os.path.join(path, name)
if os.path.isfile(exe_path):
candidates.append(exe_path)
# Check for chocolatery shims
shim_path = os.path.normpath(os.path.join(path, "..", "lib", "ffmpeg", "tools", "ffmpeg", "bin", name))
if os.path.isfile(shim_path):
candidates.append(shim_path)
if not candidates:
return None
# Pick the largest file
return max(candidates, key=lambda f: os.path.getsize(f))
if platform.system() == "Windows":
binaries = [
(find_dependency("ffmpeg.exe"), "."),
(find_dependency("ffprobe.exe"), "."),
]
else:
binaries = [
(shutil.which("ffmpeg"), "."),
(shutil.which("ffprobe"), "."),
]
binaries.append(("buzz/whisper_cpp/*", "buzz/whisper_cpp"))
if platform.system() == "Windows":
datas += [("dll_backup", "dll_backup")]
datas += collect_data_files("msvc-runtime")
binaries.append(("dll_backup/SDL2.dll", "dll_backup"))
a = Analysis(
["main.py"],
['main.py'],
pathex=[],
binaries=binaries,
binaries=[],
datas=datas,
hiddenimports=[
"dora", "dora.log",
"julius", "julius.core", "julius.resample",
"openunmix", "openunmix.filtering",
"lameenc",
"diffq",
"einops",
"hydra", "hydra.core", "hydra.core.global_hydra",
"hydra_colorlog",
"museval",
"submitit",
"treetable",
"soundfile",
"_soundfile_data",
"lhotse",
],
hiddenimports=[],
hookspath=[],
hooksconfig={},
runtime_hooks=[],
# pyarrow is excluded because its Windows wheel requires AVX2 CPU instructions,
# causing a crash (0xc000001d) on older hardware. Buzz does not use pyarrow directly;
excludes=["pyarrow"],
excludes=[],
win_no_prefer_redirects=False,
win_private_assemblies=False,
cipher=block_cipher,
@ -137,20 +45,20 @@ pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
exe = EXE(
pyz,
a.scripts,
options,
icon="./assets/buzz.ico",
[],
icon='./assets/buzz.ico',
exclude_binaries=True,
name="Buzz",
debug=DEBUG,
name='Buzz',
debug=True,
bootloader_ignore_signals=False,
strip=False,
upx=True,
console=DEBUG,
console=False,
disable_windowed_traceback=False,
argv_emulation=False,
target_arch=None,
codesign_identity=os.environ.get("BUZZ_CODESIGN_IDENTITY"),
entitlements_file="entitlements.plist" if platform.system() == "Darwin" else None,
codesign_identity=None,
entitlements_file=None,
)
coll = COLLECT(
exe,
@ -160,17 +68,17 @@ coll = COLLECT(
strip=False,
upx=False,
upx_exclude=[],
name="Buzz",
name='Buzz',
)
app = BUNDLE(
coll,
name="Buzz.app",
icon="./assets/buzz.icns",
bundle_identifier="com.chidiwilliams.buzz",
version=VERSION,
name='Buzz.app',
icon='./assets/buzz.icns',
bundle_identifier='com.chidiwilliams.buzz',
version='0.5.8',
info_plist={
"NSPrincipalClass": "NSApplication",
"NSHighResolutionCapable": "True",
"NSMicrophoneUsageDescription": "Allow Buzz to record audio from your microphone.",
},
'NSPrincipalClass': 'NSApplication',
'NSHighResolutionCapable': 'True',
'NSMicrophoneUsageDescription': 'Allow Buzz to record audio from your microphone.'
}
)

View file

@ -1 +0,0 @@
- Use uv to run tests and any scripts

View file

@ -1,134 +0,0 @@
# Contributor Covenant Code of Conduct
## Our Pledge
We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, caste, color, religion, or sexual
identity and orientation.
We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.
## Our Standards
Examples of behavior that contributes to a positive environment for our
community include:
* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
and learning from the experience
* Focusing on what is best not just for us as individuals, but for the overall
community
Examples of unacceptable behavior include:
* The use of sexualized language or imagery, and sexual attention or advances of
any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email address,
without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Enforcement Responsibilities
Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.
Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.
## Scope
This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official email address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
williamschidi1@gmail.com.
All complaints will be reviewed and investigated promptly and fairly.
All community leaders are obligated to respect the privacy and security of the
reporter of any incident.
## Enforcement Guidelines
Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:
### 1. Correction
**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.
**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.
### 2. Warning
**Community Impact**: A violation through a single incident or series of
actions.
**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or permanent
ban.
### 3. Temporary Ban
**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.
**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.
### 4. Permanent Ban
**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.
**Consequence**: A permanent ban from any sort of public interaction within the
community.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.1, available at
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
Community Impact Guidelines were inspired by
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
For answers to common questions about this code of conduct, see the FAQ at
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
[https://www.contributor-covenant.org/translations][translations].
[homepage]: https://www.contributor-covenant.org
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
[Mozilla CoC]: https://github.com/mozilla/diversity
[FAQ]: https://www.contributor-covenant.org/faq
[translations]: https://www.contributor-covenant.org/translations

View file

@ -1,123 +0,0 @@
# Buzz Contribution Guide
## Internationalization
To contribute a new language translation to Buzz:
1. Run `make translation_po locale=[locale]`. `[locale]` is a string with the format "language\[_script\]\[_country\]",
where:
- "language" is a lowercase, two-letter ISO 639 language code,
- "script" is a titlecase, four-letter, ISO 15924 script code, and
- "country" is an uppercase, two-letter, ISO 3166 country code.
For example: `make translation_po locale=en_US`.
2. Fill in the translations in the `.po` file generated in `locale/[locale]/LC_MESSAGES`.
3. Run `make translation_mo` to compile the translations, then test your changes.
4. Create a new pull request with your changes.
## Troubleshooting
If you encounter any issues, please open an issue on the Buzz GitHub repository. Here are a few tips to gather data about the issue, so it is easier for us to fix.
**Provide details**
What version of the Buzz are you using? On what OS? What are steps to reproduce it? What settings were selected, like what model type and size was used.
**Logs**
Log files contain valuable information about what the Buzz was doing before the issue occurred. You can get the logs like this:
* Linux run the app from the terminal and check the output.
* Mac get logs from `~/Library/Logs/Buzz`.
* Windows paste this into the Windows Explorer address bar `%USERPROFILE%\AppData\Local\Buzz\Buzz\Logs` and check the logs file.
**Test on latest version**
To see if your issue has already been fixed, try running the latest version of the Buzz. To get it log in to the GitHub and go to [Actions section](https://github.com/chidiwilliams/buzz/actions/workflows/ci.yml?query=branch%3Amain). Latest development versions attached to Artifacts section of successful builds.
Linux versions get also pushed to the snap. To install latest development version use `snap install buzz --channel latest/edge`
## Running Buzz locally
### Linux (Ubuntu)
1. Clone the repository `git clone --recursive https://github.com/chidiwilliams/buzz.git`
2. Enter repo folder `cd buzz`
3. Install uv `curl -LsSf https://astral.sh/uv/install.sh | sh` (or see [uv installation docs](https://docs.astral.sh/uv/getting-started/installation/))
4. Install system dependencies you may be missing
```
sudo apt-get install --no-install-recommends libyaml-dev libtbb-dev libxkbcommon-x11-0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0 libxcb-render-util0 libxcb-xinerama0 libxcb-shape0 libxcb-cursor0 libportaudio2 gettext libpulse0 ffmpeg
```
On versions prior to Ubuntu 24.04 install `sudo apt-get install --no-install-recommends libegl1-mesa`
5. Install the dependencies `uv sync`
6. Run Buzz `uv run buzz`
#### Necessary dependencies for Faster Whisper on GPU
All the dependencies for GPU support should be included in the dependency packages already installed,
but if you get issues running Faster Whisper on GPU, install [CUDA 12](https://developer.nvidia.com/cuda-downloads), [cuBLASS](https://developer.nvidia.com/cublas) and [cuDNN](https://developer.nvidia.com/cudnn).
#### Error for Faster Whisper on GPU `Could not load library libcudnn_ops_infer.so.8`
You need to add path to the library to the `LD_LIBRARY_PATH` environment variable.
Check exact path to your uv virtual environment, it may be different for you.
```
export LD_LIBRARY_PATH=/path/to/buzz/.venv/lib/python3.12/site-packages/nvidia/cudnn/lib/:$LD_LIBRARY_PATH
```
#### For Whisper.cpp you will need to install Vulkan SDK
Follow the instructions for your distribution https://vulkan.lunarg.com/doc/sdk/latest/linux/getting_started.html
### Mac
1. Clone the repository `git clone --recursive https://github.com/chidiwilliams/buzz.git`
2. Enter repo folder `cd buzz`
3. Install uv `curl -LsSf https://astral.sh/uv/install.sh | sh` (or `brew install uv`)
4. Install system dependencies you may be missing `brew install ffmpeg`
5. Install the dependencies `uv sync`
6. Run Buzz `uv run buzz`
### Windows
Assumes you have [Git](https://git-scm.com/downloads) and [python](https://www.python.org/downloads) installed and added to PATH.
1. Install the chocolatey package manager for Windows. [More info](https://docs.chocolatey.org/en-us/choco/setup)
```
Set-ExecutionPolicy Bypass -Scope Process -Force; [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1'))
```
2. Install the build tools. `choco install make cmake`
3. Install the ffmpeg. `choco install ffmpeg`
4. Download [Build Tools for Visual Studio 2022](https://visualstudio.microsoft.com/vs/older-downloads/) and install "Desktop development with C++" workload.
5. Add location of `namke` to your PATH environment variable. Usually it is `C:\Program Files (x86)\Microsoft Visual Studio\2022\BuildTools\VC\Tools\MSVC\14.44.35207\bin\Hostx64\x86`
6. Install Vulkan SDK from https://vulkan.lunarg.com/sdk/home
7. Clone the repository `git clone --recursive https://github.com/chidiwilliams/buzz.git`
8. Enter repo folder `cd buzz`
9. Install uv `powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"`
10. Install the dependencies `uv sync`
11. Build Whisper.cpp `uv run make buzz/whisper_cpp`
12. `cp -r .\dll_backup\ .\buzz\`
13. Run Buzz `uv run buzz`
Note: It should be safe to ignore any "syntax errors" you see during the build. Buzz will work. Also you can ignore any errors for FFmpeg. Buzz tries to load FFmpeg by several different means and some of them throw errors, but FFmpeg should eventually be found and work.
#### GPU Support
GPU support on Windows with Nvidia GPUs is included out of the box in the `.exe` installer.
To add GPU support for source or `pip` installed version switch torch library to GPU version. For more info see https://pytorch.org/get-started/locally/ .
```
uv add --index https://download.pytorch.org/whl/cu128 torch==2.7.1+cu128 torchaudio==2.7.1+cu128
uv add --index https://pypi.ngc.nvidia.com nvidia-cublas-cu12==12.8.3.14 nvidia-cuda-cupti-cu12==12.8.57 nvidia-cuda-nvrtc-cu12==12.8.61 nvidia-cuda-runtime-cu12==12.8.57 nvidia-cudnn-cu12==9.7.1.26 nvidia-cufft-cu12==11.3.3.41 nvidia-curand-cu12==10.3.9.55 nvidia-cusolver-cu12==11.7.2.55 nvidia-cusparse-cu12==12.5.4.2 nvidia-cusparselt-cu12==0.6.3 nvidia-nvjitlink-cu12==12.8.61 nvidia-nvtx-cu12==12.8.55
```
To use Faster Whisper on GPU, install the following libraries:
* [cuBLAS](https://developer.nvidia.com/cublas)
* [cuDNN](https://developer.nvidia.com/cudnn)

289
Makefile
View file

@ -1,115 +1,116 @@
# Change also in pyproject.toml and buzz/__version__.py
version := 1.4.4
version := $$(poetry version -s)
mac_app_path := ./dist/Buzz.app
mac_zip_path := ./dist/Buzz-${version}-mac.zip
mac_dmg_path := ./dist/Buzz-${version}-mac.dmg
bundle_windows: dist/Buzz
iscc installer.iss
unix_zip_path := Buzz-${version}-unix.tar.gz
bundle_mac: dist/Buzz.app codesign_all_mac zip_mac notarize_zip staple_app_mac dmg_mac
windows_zip_path := Buzz-${version}-windows.tar.gz
bundle_mac_unsigned: dist/Buzz.app zip_mac dmg_mac_unsigned
bundle_linux: dist/Buzz
cd dist && tar -czf ${unix_zip_path} Buzz/ && cd -
dist/Buzz.exe: dist/Buzz
iscc //DAppVersion=${version} installer.iss
dist/Buzz-windows.exe: dist/Buzz
cd dist && tar -czf ${windows_zip_path} Buzz/ && cd -
bundle_mac: dist/Buzz.app
make zip_mac
bundle_mac_local: dist/Buzz.app
make codesign_all_mac
make zip_mac
make notarize_zip
make staple_app_mac
make dmg_mac
UNAME_S := $(shell uname -s)
LIBWHISPER :=
ifeq ($(OS), Windows_NT)
LIBWHISPER=whisper.dll
else
ifeq ($(UNAME_S), Darwin)
LIBWHISPER=libwhisper.dylib
else
LIBWHISPER=libwhisper.so
endif
endif
clean:
ifeq ($(OS), Windows_NT)
-rmdir /s /q buzz\whisper_cpp
-rmdir /s /q whisper.cpp\build
-rmdir /s /q dist
-Remove-Item -Recurse -Force buzz\whisper_cpp
-Remove-Item -Recurse -Force whisper.cpp\build
-Remove-Item -Recurse -Force dist\*
-rm -rf buzz/whisper_cpp
-rm -rf whisper.cpp/build
-rm -rf dist/*
-rm -rf buzz/__pycache__ buzz/**/__pycache__ buzz/**/**/__pycache__ buzz/**/**/**/__pycache__
-for /d /r buzz %%d in (__pycache__) do @if exist "%%d" rmdir /s /q "%%d"
else
rm -rf buzz/whisper_cpp || true
rm -rf whisper.cpp/build || true
rm -f $(LIBWHISPER)
rm -f whisper_cpp.py
rm -rf dist/* || true
find buzz -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
endif
COVERAGE_THRESHOLD := 70
test: whisper_cpp.py
pytest --cov
test: buzz/whisper_cpp
# A check to get updates of yt-dlp. Should run only on local as part of regular development operations
# Sort of a local "update checker"
ifndef CI
uv lock --upgrade-package yt-dlp
endif
pytest -s -vv --cov=buzz --cov-report=xml --cov-report=html --benchmark-skip --cov-fail-under=${COVERAGE_THRESHOLD} --cov-config=.coveragerc
benchmarks: buzz/whisper_cpp
pytest -s -vv --benchmark-only --benchmark-json benchmarks.json
dist/Buzz dist/Buzz.app: buzz/whisper_cpp
dist/Buzz dist/Buzz.app: whisper_cpp.py
pyinstaller --noconfirm Buzz.spec
version:
echo "VERSION = \"${version}\"" > buzz/__version__.py
poetry version ${version}
echo "VERSION = \"${version}\"" > __version__.py
buzz/whisper_cpp: translation_mo
ifeq ($(OS), Windows_NT)
# Build Whisper with Vulkan support.
# The _DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR is needed to prevent mutex lock issues on Windows
# https://github.com/actions/runner-images/issues/10004#issuecomment-2156109231
# -DCMAKE_[C|CXX]_COMPILER_WORKS=TRUE is used to prevent issue in building test program that fails on CI
# GGML_NATIVE=OFF ensures we don't use -march=native (which would target the build machine's CPU)
cmake -S whisper.cpp -B whisper.cpp/build/ -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_RPATH='$$ORIGIN' -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DCMAKE_C_FLAGS="-D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR" -DCMAKE_CXX_FLAGS="-D_DISABLE_CONSTEXPR_MUTEX_CONSTRUCTOR" -DCMAKE_C_COMPILER_WORKS=TRUE -DCMAKE_CXX_COMPILER_WORKS=TRUE -DGGML_VULKAN=1 -DGGML_NATIVE=OFF
cmake --build whisper.cpp/build -j --config Release --verbose
-mkdir buzz/whisper_cpp
cp whisper.cpp/build/bin/Release/whisper-cli.exe buzz/whisper_cpp/
cp whisper.cpp/build/bin/Release/whisper-server.exe buzz/whisper_cpp/
cp dll_backup/SDL2.dll buzz/whisper_cpp
PowerShell -NoProfile -ExecutionPolicy Bypass -Command "if (-not (Test-Path 'buzz\whisper_cpp\ggml-silero-v6.2.0.bin')) { Start-BitsTransfer -Source https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin -Destination 'buzz\whisper_cpp\ggml-silero-v6.2.0.bin' }"
endif
ifeq ($(shell uname -s), Linux)
# Build Whisper with Vulkan support
# GGML_NATIVE=OFF ensures we don't use -march=native (which would target the build machine's CPU)
# This enables portable SSE4.2/AVX/AVX2 optimizations that work on most x86_64 CPUs
rm -rf whisper.cpp/build || true
-mkdir -p buzz/whisper_cpp
cmake -S whisper.cpp -B whisper.cpp/build/ -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_RPATH='$$ORIGIN' -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON -DGGML_VULKAN=1 -DGGML_NATIVE=OFF
cmake --build whisper.cpp/build -j --config Release --verbose
cp whisper.cpp/build/bin/whisper-cli buzz/whisper_cpp/ || true
cp whisper.cpp/build/bin/whisper-server buzz/whisper_cpp/ || true
cp -P whisper.cpp/build/src/libwhisper.so* buzz/whisper_cpp/ || true
cp -P whisper.cpp/build/ggml/src/libggml.so* buzz/whisper_cpp/ || true
cp -P whisper.cpp/build/ggml/src/libggml-base.so* buzz/whisper_cpp/ || true
cp -P whisper.cpp/build/ggml/src/libggml-cpu.so* buzz/whisper_cpp/ || true
cp -P whisper.cpp/build/ggml/src/ggml-vulkan/libggml-vulkan.so* buzz/whisper_cpp/ || true
test -f buzz/whisper_cpp/ggml-silero-v6.2.0.bin || curl -L -o buzz/whisper_cpp/ggml-silero-v6.2.0.bin https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin
endif
# Build on Macs
ifeq ($(shell uname -s), Darwin)
-rm -rf whisper.cpp/build || true
-mkdir -p buzz/whisper_cpp
ifeq ($(shell uname -m), arm64)
cmake -S whisper.cpp -B whisper.cpp/build/ -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DWHISPER_COREML=1
CMAKE_FLAGS=
ifeq ($(UNAME_S),Darwin)
AVX1_M := $(shell sysctl machdep.cpu.features)
ifeq (,$(findstring AVX1.0,$(AVX1_M)))
CMAKE_FLAGS += -DWHISPER_NO_AVX=ON
endif
AVX2_M := $(shell sysctl machdep.cpu.leaf7_features)
ifeq (,$(findstring AVX2,$(AVX2_M)))
CMAKE_FLAGS += -DWHISPER_NO_AVX2=ON
endif
else
# Intel
cmake -S whisper.cpp -B whisper.cpp/build/ -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DGGML_VULKAN=0 -DGGML_METAL=0
ifeq ($(OS), Windows_NT)
CMAKE_FLAGS += -DBUILD_SHARED_LIBS=ON
endif
endif
cmake --build whisper.cpp/build -j --config Release --verbose
cp whisper.cpp/build/bin/whisper-cli buzz/whisper_cpp/ || true
cp whisper.cpp/build/bin/whisper-server buzz/whisper_cpp/ || true
cp whisper.cpp/build/src/libwhisper.dylib buzz/whisper_cpp/ || true
cp whisper.cpp/build/ggml/src/libggml* buzz/whisper_cpp/ || true
test -f buzz/whisper_cpp/ggml-silero-v6.2.0.bin || curl -L -o buzz/whisper_cpp/ggml-silero-v6.2.0.bin https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v6.2.0.bin
endif
$(LIBWHISPER):
cmake -S whisper.cpp -B whisper.cpp/build/ $(CMAKE_FLAGS)
cmake --build whisper.cpp/build --verbose
cp whisper.cpp/build/$(LIBWHISPER) . || true
cp whisper.cpp/build/bin/Debug/$(LIBWHISPER) . || true
whisper_cpp.py: $(LIBWHISPER)
ctypesgen ./whisper.cpp/whisper.h -l$(LIBWHISPER) -o whisper_cpp.py
staple_app_mac:
xcrun stapler staple ${mac_app_path}
codesign_all_mac:
make codesign_mac path="./dist/Buzz.app"
make codesign_mac path="./dist/Buzz.app/Contents/MacOS/Buzz"
for i in $$(find dist/Buzz.app/Contents/Resources -name "*.dylib" -o -name "*.so" -type f); \
do \
make codesign_mac path="$$i"; \
done
for i in $$(find dist/Buzz.app/Contents/Resources/torch/bin -name "*" -type f); \
do \
make codesign_mac path="$$i"; \
done
make codesign_mac path="./dist/Buzz.app/Contents/Resources/ffmpeg"
make codesign_mac path="./dist/Buzz.app/Contents/MacOS/Buzz"
make codesign_verify
codesign_mac:
codesign --deep --force --options=runtime --entitlements ./entitlements.plist --sign "$$BUZZ_CODESIGN_IDENTITY" --timestamp ${path}
zip_mac:
ditto -c -k --keepParent "${mac_app_path}" "${mac_zip_path}"
# Prints all the Mac developer identities used for code signing
print_identities_mac:
security find-identity -p basic -v
notarize_zip:
xcrun notarytool submit ${mac_zip_path} --keychain-profile "$$BUZZ_KEYCHAIN_NOTARY_PROFILE" --wait
dmg_mac:
ditto -x -k "${mac_zip_path}" dist/dmg
create-dmg \
@ -118,125 +119,19 @@ dmg_mac:
--window-pos 200 120 \
--window-size 600 300 \
--icon-size 100 \
--icon "Buzz.app" 175 120 \
--icon "./assets/buzz.icns" 175 120 \
--hide-extension "Buzz.app" \
--app-drop-link 425 120 \
--codesign "$$BUZZ_CODESIGN_IDENTITY" \
--notarize "$$BUZZ_KEYCHAIN_NOTARY_PROFILE" \
--filesystem APFS \
"${mac_dmg_path}" \
"dist/dmg/"
dmg_mac_unsigned:
ditto -x -k "${mac_zip_path}" dist/dmg
create-dmg \
--volname "Buzz" \
--volicon "./assets/buzz.icns" \
--window-pos 200 120 \
--window-size 600 300 \
--icon-size 100 \
--icon "Buzz.app" 175 120 \
--hide-extension "Buzz.app" \
--app-drop-link 425 120 \
"${mac_dmg_path}" \
"dist/dmg/"
staple_app_mac:
xcrun stapler staple ${mac_app_path}
notarize_zip:
xcrun notarytool submit ${mac_zip_path} --keychain-profile "$$BUZZ_KEYCHAIN_NOTARY_PROFILE" --wait
zip_mac:
ditto -c -k --keepParent "${mac_app_path}" "${mac_zip_path}"
codesign_all_mac: dist/Buzz.app
for i in $$(find dist/Buzz.app/Contents/Resources/torch/bin -name "*" -type f); \
do \
codesign --force --options=runtime --sign "$$BUZZ_CODESIGN_IDENTITY" --timestamp "$$i"; \
done
for i in $$(find dist/Buzz.app/Contents/Resources -name "*.dylib" -o -name "*.so" -type f); \
do \
codesign --force --options=runtime --sign "$$BUZZ_CODESIGN_IDENTITY" --timestamp "$$i"; \
done
for i in $$(find dist/Buzz.app/Contents/MacOS -name "*.dylib" -o -name "*.so" -o -name "Qt*" -o -name "Python" -type f); \
do \
codesign --force --options=runtime --sign "$$BUZZ_CODESIGN_IDENTITY" --timestamp "$$i"; \
done
codesign --force --options=runtime --sign "$$BUZZ_CODESIGN_IDENTITY" --timestamp dist/Buzz.app/Contents/MacOS/Buzz
codesign --force --options=runtime --sign "$$BUZZ_CODESIGN_IDENTITY" --entitlements ./entitlements.plist --timestamp dist/Buzz.app
codesign --verify --deep --strict --verbose=2 dist/Buzz.app
# HELPERS
# Get the build logs for a notary upload
notarize_log:
xcrun notarytool log ${id} --keychain-profile "$$BUZZ_KEYCHAIN_NOTARY_PROFILE"
# Make GGML model from whisper. Example: make ggml model_path=/Users/chidiwilliams/.cache/whisper/medium.pt
ggml:
python3 ./whisper.cpp/models/convert-pt-to-ggml.py ${model_path} .venv/lib/python3.12/site-packages/whisper dist
upload_brew:
brew bump-cask-pr --version ${version} --verbose buzz
UPGRADE_VERSION_BRANCH := upgrade-to-${version}
gh_upgrade_pr:
git checkout main && git pull
git checkout -B ${UPGRADE_VERSION_BRANCH}
make version version=${version}
git commit -am "Upgrade to ${version}"
git push --set-upstream origin ${UPGRADE_VERSION_BRANCH}
gh pr create --fill
gh pr merge ${UPGRADE_VERSION_BRANCH} --auto --squash
# Internationalization
translation_po_all:
$(MAKE) translation_po locale=ca_ES
$(MAKE) translation_po locale=da_DK
$(MAKE) translation_po locale=de_DE
$(MAKE) translation_po locale=en_US
$(MAKE) translation_po locale=es_ES
$(MAKE) translation_po locale=it_IT
$(MAKE) translation_po locale=ja_JP
$(MAKE) translation_po locale=lv_LV
$(MAKE) translation_po locale=nl
$(MAKE) translation_po locale=pl_PL
$(MAKE) translation_po locale=pt_BR
$(MAKE) translation_po locale=uk_UA
$(MAKE) translation_po locale=zh_CN
$(MAKE) translation_po locale=zh_TW
TMP_POT_FILE_PATH := $(shell mktemp)
PO_FILE_PATH := buzz/locale/${locale}/LC_MESSAGES/buzz.po
translation_po:
mkdir -p buzz/locale/${locale}/LC_MESSAGES
xgettext --from-code=UTF-8 --add-location=file -o "${TMP_POT_FILE_PATH}" -l python $(shell find buzz -name '*.py')
sed -i.bak 's/CHARSET/UTF-8/' ${TMP_POT_FILE_PATH}
if [ ! -f ${PO_FILE_PATH} ]; then \
msginit --no-translator --input=${TMP_POT_FILE_PATH} --output-file=${PO_FILE_PATH}; \
fi
rm ${TMP_POT_FILE_PATH}.bak
msgmerge -U ${PO_FILE_PATH} ${TMP_POT_FILE_PATH}
# On windows we can have two ways to compile locales, one for CI the other for local builds
# Will try both and ignore errors if they fail
translation_mo:
ifeq ($(OS), Windows_NT)
-forfiles /p buzz\locale /c "cmd /c python ..\..\msgfmt.py -o @path\LC_MESSAGES\buzz.mo @path\LC_MESSAGES\buzz.po"
-for dir in buzz/locale/*/ ; do \
python msgfmt.py -o $$dir/LC_MESSAGES/buzz.mo $$dir/LC_MESSAGES/buzz.po; \
done
else
for dir in buzz/locale/*/ ; do \
python3 msgfmt.py -o $$dir/LC_MESSAGES/buzz.mo $$dir/LC_MESSAGES/buzz.po; \
done
endif
lint:
ruff check . --fix
ruff format .
codesign_verify:
codesign --verify --deep --strict --verbose=2 dist/Buzz.app

View file

@ -1,98 +0,0 @@
# Buzz
[ドキュメント](https://chidiwilliams.github.io/buzz/)
パソコン上でオフラインで音声の文字起こしと翻訳を行います。OpenAIの[Whisper](https://github.com/openai/whisper)を使用しています。
![MIT License](https://img.shields.io/badge/license-MIT-green)
[![CI](https://github.com/chidiwilliams/buzz/actions/workflows/ci.yml/badge.svg)](https://github.com/chidiwilliams/buzz/actions/workflows/ci.yml)
[![codecov](https://codecov.io/github/chidiwilliams/buzz/branch/main/graph/badge.svg?token=YJSB8S2VEP)](https://codecov.io/github/chidiwilliams/buzz)
![GitHub release (latest by date)](https://img.shields.io/github/v/release/chidiwilliams/buzz)
[![Github all releases](https://img.shields.io/github/downloads/chidiwilliams/buzz/total.svg)](https://GitHub.com/chidiwilliams/buzz/releases/)
![Buzz](./buzz/assets/buzz-banner.jpg)
## 機能
- 音声・動画ファイルまたはYouTubeリンクの文字起こし
- マイクからのリアルタイム音声文字起こし
- イベントやプレゼンテーション中に便利なプレゼンテーションウィンドウ
- ノイズの多い音声でより高い精度を得るための、文字起こし前の話者分離
- 文字起こしメディアでの話者識別
- 複数のWhisperバックエンドをサポート
- Nvidia GPU向けCUDAアクセラレーション対応
- Mac向けApple Silicon対応
- Whisper.cppでのVulkanアクセラレーション対応統合GPUを含むほとんどのGPUで利用可能
- TXT、SRT、VTT形式での文字起こしエクスポート
- 検索、再生コントロール、速度調整機能を備えた高度な文字起こしビューア
- 効率的なナビゲーションのためのキーボードショートカット
- 新しいファイルの自動文字起こしのための監視フォルダ
- スクリプトや自動化のためのコマンドラインインターフェース
## インストール
### macOS
[SourceForge](https://sourceforge.net/projects/buzz-captions/files/)から`.dmg`ファイルをダウンロードしてください。
### Windows
[SourceForge](https://sourceforge.net/projects/buzz-captions/files/)からインストールファイルを入手してください。
アプリは署名されていないため、インストール時に警告が表示されます。`詳細情報` -> `実行`を選択してください。
### Linux
Buzzは[Flatpak](https://flathub.org/apps/io.github.chidiwilliams.Buzz)または[Snap](https://snapcraft.io/buzz)として利用可能です。
Flatpakをインストールするには、以下を実行してください
```shell
flatpak install flathub io.github.chidiwilliams.Buzz
```
[![Download on Flathub](https://flathub.org/api/badge?svg&locale=en)](https://flathub.org/en/apps/io.github.chidiwilliams.Buzz)
Snapをインストールするには、以下を実行してください
```shell
sudo apt-get install libportaudio2 libcanberra-gtk-module libcanberra-gtk3-module
sudo snap install buzz
```
[![Get it from the Snap Store](https://snapcraft.io/static/images/badges/en/snap-store-black.svg)](https://snapcraft.io/buzz)
### PyPI
[ffmpeg](https://www.ffmpeg.org/download.html)をインストールしてください。
Python 3.12環境を使用していることを確認してください。
Buzzをインストール
```shell
pip install buzz-captions
python -m buzz
```
**PyPIでのGPUサポート**
PyPIでインストールしたバージョンでWindows上のNvidia GPUのGPUサポートを有効にするには、[torch](https://pytorch.org/get-started/locally/)のCUDAサポートを確認してください。
```
pip3 install -U torch==2.8.0+cu129 torchaudio==2.8.0+cu129 --index-url https://download.pytorch.org/whl/cu129
pip3 install nvidia-cublas-cu12==12.9.1.4 nvidia-cuda-cupti-cu12==12.9.79 nvidia-cuda-runtime-cu12==12.9.79 --extra-index-url https://pypi.ngc.nvidia.com
```
### 最新開発版
最新の機能やバグ修正を含む最新開発版の入手方法については、[FAQ](https://chidiwilliams.github.io/buzz/docs/faq#9-where-can-i-get-latest-development-version)をご覧ください。
### スクリーンショット
<div style="display: flex; flex-wrap: wrap;">
<img alt="ファイルインポート" src="share/screenshots/buzz-1-import.png" style="max-width: 18%; margin-right: 1%;" />
<img alt="メイン画面" src="share/screenshots/buzz-2-main_screen.png" style="max-width: 18%; margin-right: 1%; height:auto;" />
<img alt="設定" src="share/screenshots/buzz-3-preferences.png" style="max-width: 18%; margin-right: 1%; height:auto;" />
<img alt="モデル設定" src="share/screenshots/buzz-3.2-model-preferences.png" style="max-width: 18%; margin-right: 1%; height:auto;" />
<img alt="文字起こし" src="share/screenshots/buzz-4-transcript.png" style="max-width: 18%; margin-right: 1%; height:auto;" />
<img alt="ライブ録音" src="share/screenshots/buzz-5-live_recording.png" style="max-width: 18%; margin-right: 1%; height:auto;" />
<img alt="リサイズ" src="share/screenshots/buzz-6-resize.png" style="max-width: 18%;" />
</div>

171
README.md
View file

@ -1,106 +1,141 @@
[[简体中文](readme/README.zh_CN.md)] <- 点击查看中文页面。
# Buzz
[Documentation](https://chidiwilliams.github.io/buzz/)
Transcribe and translate audio offline on your personal computer. Powered by
OpenAI's [Whisper](https://github.com/openai/whisper).
Transcribe and translate audio offline on your personal computer. Powered by OpenAI's [Whisper](https://github.com/openai/whisper).
![MIT License](https://img.shields.io/badge/license-MIT-green)
[![CI](https://github.com/chidiwilliams/buzz/actions/workflows/ci.yml/badge.svg)](https://github.com/chidiwilliams/buzz/actions/workflows/ci.yml)
[![codecov](https://codecov.io/github/chidiwilliams/buzz/branch/main/graph/badge.svg?token=YJSB8S2VEP)](https://codecov.io/github/chidiwilliams/buzz)
![GitHub release (latest by date)](https://img.shields.io/github/v/release/chidiwilliams/buzz)
[![Github all releases](https://img.shields.io/github/downloads/chidiwilliams/buzz/total.svg)](https://GitHub.com/chidiwilliams/buzz/releases/)
![Buzz](https://raw.githubusercontent.com/chidiwilliams/buzz/refs/heads/main/buzz/assets/buzz-banner.jpg)
![Buzz](./assets/buzz-banner.jpg)
## Features
- Transcribe audio and video files or Youtube links
- Live realtime audio transcription from microphone
- Presentation window for easy accessibility during events and presentations
- Speech separation before transcription for better accuracy on noisy audio
- Speaker identification in transcribed media
- Multiple whisper backend support
- CUDA acceleration support for Nvidia GPUs
- Apple Silicon support for Macs
- Vulkan acceleration support for Whisper.cpp on most GPUs, including integrated GPUs
- Export transcripts to TXT, SRT, and VTT
- Advanced Transcription Viewer with search, playback controls, and speed adjustment
- Keyboard shortcuts for efficient navigation
- Watch folder for automatic transcription of new files
- Command-Line Interface for scripting and automation
- Real-time transcription and translation from your computer's microphones to text ([Demo](https://www.loom.com/share/564b753eb4d44b55b985b8abd26b55f7))
- Import audio and video files and export transcripts to TXT, SRT, and VTT ([Demo](https://www.loom.com/share/cf263b099ac3481082bb56d19b7c87fe))
## Installation
### macOS
To install Buzz, download the [latest version](https://github.com/chidiwilliams/buzz/releases/latest) for your operating system. Buzz is available on **Mac** and **Windows**.
Download the `.dmg` from the [SourceForge](https://sourceforge.net/projects/buzz-captions/files/).
### Mac (macOS 11.7 and above)
- Download and open the `Buzz-x.y.z-dmg` file.
- After the installation window opens, drag the Buzz icon into the folder to add Buzz to your Applications directory.
### Windows
Get the installation files from the [SourceForge](https://sourceforge.net/projects/buzz-captions/files/).
- Download and run the `Buzz-x.y.z.exe` file.
App is not signed, you will get a warning when you install it. Select `More info` -> `Run anyway`.
## How to use
### Linux
### Live Recording
Buzz is available as a [Flatpak](https://flathub.org/apps/io.github.chidiwilliams.Buzz) or a [Snap](https://snapcraft.io/buzz).
To start a live recording:
To install flatpak, run:
```shell
flatpak install flathub io.github.chidiwilliams.Buzz
```
- Select a recording task, language, quality, and microphone.
- Click Record.
[![Download on Flathub](https://flathub.org/api/badge?svg&locale=en)](https://flathub.org/en/apps/io.github.chidiwilliams.Buzz)
| Field | Options | Default | Description |
| ---------- | ---------------------------------------------------------------------------------------------------------------------------------------- | --------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| Task | "Transcribe", "Translate" | "Transcribe" | "Transcribe" converts the input audio into text in the selected language, while "Translate" converts it into text in English. |
| Language | See [Whisper's documentation](https://github.com/openai/whisper#available-models-and-languages) for the full list of supported languages | "Detect Language" | "Detect Language" will try to detect the spoken language in the audio based on the first few seconds. However, selecting a language is recommended (if known) as it will improve transcription quality in many cases. |
| Quality | "Very Low", "Low", "Medium", "High" | "Low" | The transcription quality determines the Whisper model used for transcription. "Very Low" uses the "tiny" model; "Low" uses the "base" model; "Medium" uses the "small" model; and "High" uses the "medium" model. The larger models produce higher-quality transcriptions, but require more system resources. See [Whisper's documentation](https://github.com/openai/whisper#available-models-and-languages) for more information about the models. |
| Microphone | [Available system microphones] | [Default system microphone] | Microphone for recording input audio. |
To install snap, run:
```shell
sudo apt-get install libportaudio2 libcanberra-gtk-module libcanberra-gtk3-module
sudo snap install buzz
```
[![Live Recording on Buzz](https://cdn.loom.com/sessions/thumbnails/564b753eb4d44b55b985b8abd26b55f7-with-play.gif)](https://www.loom.com/share/564b753eb4d44b55b985b8abd26b55f7 'Live Recording on Buzz')
[![Get it from the Snap Store](https://snapcraft.io/static/images/badges/en/snap-store-black.svg)](https://snapcraft.io/buzz)
### Record audio playing from computer
### PyPI
To record audio playing from an application on your computer, you may install an audio loopback driver (a program that lets you create virtual audio devices). The rest of this guide will use [BlackHole](https://github.com/ExistentialAudio/BlackHole) on Mac, but you can use other alternatives for your operating system (see [LoopBeAudio](https://nerds.de/en/loopbeaudio.html), [LoopBack](https://rogueamoeba.com/loopback/), and [Virtual Audio Cable](https://vac.muzychenko.net/en/)).
Install [ffmpeg](https://www.ffmpeg.org/download.html)
1. Install [BlackHole via Homebrew](https://github.com/ExistentialAudio/BlackHole#option-2-install-via-homebrew)
Ensure you use Python 3.12 environment.
```shell
brew install blackhole-2ch
```
Install Buzz
2. Open Audio MIDI Setup from Spotlight or from `/Applications/Utilities/Audio Midi Setup.app`.
![Open Audio MIDI Setup from Spotlight](https://existential.audio/howto/img/spotlight.png)
3. Click the '+' icon at the lower left corner and select 'Create Multi-Output Device'.
![Create multi-output device](https://existential.audio/howto/img/createmulti-output.png)
4. Add your default speaker and BlackHole to the multi-output device.
![Screenshot of multi-output device](https://existential.audio/howto/img/multi-output.png)
5. Select this multi-output device as your speaker (application or system-wide) to play audio into BlackHole.
6. Open Buzz, select BlackHole as your microphone, and record as before to see transcriptions from the audio playing through BlackHole.
## File import
To import a file:
- Click Import on the File menu (or **Command + O** on Mac, **Ctrl + O** on Windows).
- Choose an audio or video file. Supported formats: "mp3", "wav", "m4a", "ogg", "mp4", "webm", "ogm".
- Select a task, language, quality, and export format.
- Click Run.
| Field | Options | Default |
| --------- | ------------------- | ------- |
| Export As | "TXT", "SRT", "VTT" | "TXT" |
(See the [Live Recording section](#live-recording) for more information about the task, language, and quality settings.)
[![Media File Import on Buzz](https://cdn.loom.com/sessions/thumbnails/cf263b099ac3481082bb56d19b7c87fe-with-play.gif)](https://www.loom.com/share/cf263b099ac3481082bb56d19b7c87fe 'Media File Import on Buzz')
## Settings
### Enable GGML inference
_(Default: off)_
Turn this on to use inference from [Whisper.cpp](https://github.com/ggerganov/whisper.cpp). Whisper.cpp runs faster than Whisper's original Python implementation but requires a different set of models for inference. The setting is also not available on Windows and with the "Detect Language" option; it should fall back to the original Whisper inference. See the [Whisper.cpp documentation](https://github.com/ggerganov/whisper.cpp) for more information.
| Model | Link | SHA256 |
| ----- | ------------------------------------------------------------------ | ---------------------------------------------------------------- |
| tiny | <https://ggml.buzz.chidiwilliams.com/ggml-model-whisper-tiny.bin> | be07e048e1e599ad46341c8d2a135645097a538221678b7acdd1b1919c6e1b21 |
| base | <https://ggml.buzz.chidiwilliams.com/ggml-model-whisper-base.bin> | 60ed5bc3dd14eea856493d334349b405782ddcaf0028d4b5df4088345fba2efe |
| small | <https://ggml.buzz.chidiwilliams.com/ggml-model-whisper-small.bin> | 1be3a9b2063867b937e64e2ec7483364a79917e157fa98c5d94b5c1fffea987b |
## Build/run locally
To build/run Buzz locally from source, first install the dependencies:
1. Install [Poetry](https://python-poetry.org/docs/#installing-with-the-official-installer).
2. Install the project dependencies.
```shell
poetry install
```
3. (Optional) To use Whisper.cpp inference, run:
```shell
make libwhisper.so
```
Then, to run:
```shell
pip install buzz-captions
python -m buzz
poetry run python main.py
```
**GPU support for PyPI**
To build:
To have GPU support for Nvidia GPUS on Windows, for PyPI installed version ensure, CUDA support for [torch](https://pytorch.org/get-started/locally/)
```
pip3 install -U torch==2.8.0+cu129 torchaudio==2.8.0+cu129 --index-url https://download.pytorch.org/whl/cu129
pip3 install nvidia-cublas-cu12==12.9.1.4 nvidia-cuda-cupti-cu12==12.9.79 nvidia-cuda-runtime-cu12==12.9.79 --extra-index-url https://pypi.ngc.nvidia.com
```shell
poetry run pyinstaller --noconfirm Buzz.spec
```
### Latest development version
## FAQ
For info on how to get latest development version with latest features and bug fixes see [FAQ](https://chidiwilliams.github.io/buzz/docs/faq#9-where-can-i-get-latest-development-version).
1. **Where are the models stored?**
### Support Buzz
The Whisper models are stored in `~/.cache/whisper`. The Whisper.cpp models are stored in `~/Library/Caches/Buzz` (Mac OS), `~/.cache/Buzz` (Unix), `C:\Users/<username>\AppData\Local\Buzz\Buzz\Cache` (Windows).
You can help the Buzz by starring 🌟 the repo and sharing it with your friends.
### Screenshots
<div style="display: flex; flex-wrap: wrap;">
<img alt="File import" src="https://github.com/chidiwilliams/buzz/raw/main/share/screenshots/buzz-1-import.png" style="max-width: 18%; margin-right: 1%;" />
<img alt="Main screen" src="https://github.com/chidiwilliams/buzz/raw/main/share/screenshots/buzz-2-main_screen.png" style="max-width: 18%; margin-right: 1%; height:auto;" />
<img alt="Preferences" src="https://github.com/chidiwilliams/buzz/raw/main/share/screenshots/buzz-3-preferences.png" style="max-width: 18%; margin-right: 1%; height:auto;" />
<img alt="Model preferences" src="https://github.com/chidiwilliams/buzz/raw/main/share/screenshots/buzz-3.2-model-preferences.png" style="max-width: 18%; margin-right: 1%; height:auto;" />
<img alt="Transcript" src="https://github.com/chidiwilliams/buzz/raw/main/share/screenshots/buzz-4-transcript.png" style="max-width: 18%; margin-right: 1%; height:auto;" />
<img alt="Live recording" src="https://github.com/chidiwilliams/buzz/raw/main/share/screenshots/buzz-5-live_recording.png" style="max-width: 18%; margin-right: 1%; height:auto;" />
<img alt="Resize" src="https://github.com/chidiwilliams/buzz/raw/main/share/screenshots/buzz-6-resize.png" style="max-width: 18%;" />
</div>
2. **What can I try if the transcription runs too slowly?**
Try using a lower quality or turning on [GGML inference](#enable-ggml-inference).

1
__version__.py Normal file
View file

@ -0,0 +1 @@
VERSION = "0.5.8"

View file

Before

Width:  |  Height:  |  Size: 895 KiB

After

Width:  |  Height:  |  Size: 895 KiB

Before After
Before After

BIN
assets/buzz-icon-1024.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 217 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 279 KiB

After

Width:  |  Height:  |  Size: 279 KiB

Before After
Before After

View file

Before

Width:  |  Height:  |  Size: 122 KiB

After

Width:  |  Height:  |  Size: 122 KiB

Before After
Before After

View file

@ -1,17 +0,0 @@
[Desktop Entry]
Type=Application
Encoding=UTF-8
Name=Buzz
Comment=Buzz transcribes and translates audio offline on your personal computer.
Path=/opt/buzz
Exec=/opt/buzz/Buzz
Icon=buzz
Terminal=false

View file

View file

@ -1,4 +0,0 @@
import buzz.buzz
if __name__ == "__main__":
buzz.buzz.main()

View file

@ -1 +0,0 @@
VERSION = "1.4.4"

View file

@ -1,23 +0,0 @@
import typing
from PyQt6.QtGui import QAction, QKeySequence
class Action(QAction):
def setShortcut(
self,
shortcut: typing.Union["QKeySequence", "QKeySequence.StandardKey", str, int],
) -> None:
super().setShortcut(shortcut)
self.setToolTip(Action.get_tooltip(self))
@classmethod
def get_tooltip(cls, action: QAction):
tooltip = action.toolTip()
shortcut = action.shortcut()
if shortcut.isEmpty():
return tooltip
shortcut_text = shortcut.toString(QKeySequence.SequenceFormat.NativeText)
return f"<p style='white-space:pre'>{tooltip}&nbsp;&nbsp;<code style='font-size:small'>{shortcut_text}</code></p>"

View file

@ -1,12 +0,0 @@
import os
import sys
APP_BASE_DIR = (
getattr(sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__)))
if getattr(sys, "frozen", False)
else os.path.dirname(__file__)
)
def get_path(path: str):
return os.path.join(APP_BASE_DIR, path)

View file

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" height="48" width="48"><path d="M21.65 38.85v-12.5H9.15v-4.7h12.5V9.15h4.7v12.5h12.5v4.7h-12.5v12.5Z"/></svg>

Before

Width:  |  Height:  |  Size: 150 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 310 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 279 KiB

View file

@ -1,23 +0,0 @@
<svg width="1024" height="1024" viewBox="0 0 1024 1024" fill="none" xmlns="http://www.w3.org/2000/svg">
<g clip-path="url(#clip0_945_8)">
<rect width="1024" height="1024" fill="#CD0000"/>
<rect width="1024" height="1024" fill="url(#paint0_linear_945_8)"/>
<path d="M871 512C871 710.27 710.27 871 512 871C313.73 871 153 710.27 153 512C153 313.73 313.73 153 512 153C710.27 153 871 313.73 871 512Z"
stroke="white" stroke-width="72"/>
<circle cx="512.5" cy="512.5" r="237.5" fill="url(#paint1_radial_945_8)"/>
</g>
<defs>
<linearGradient id="paint0_linear_945_8" x1="512" y1="0" x2="512" y2="1024" gradientUnits="userSpaceOnUse">
<stop stop-color="#CD0000" stop-opacity="0"/>
<stop offset="1" stop-opacity="0.2"/>
</linearGradient>
<radialGradient id="paint1_radial_945_8" cx="0" cy="0" r="1" gradientUnits="userSpaceOnUse"
gradientTransform="translate(446 454.906) rotate(45) scale(320.761 365.853)">
<stop offset="0.578998" stop-color="white"/>
<stop offset="0.873177" stop-color="#E6E6E6"/>
</radialGradient>
<clipPath id="clip0_945_8">
<rect width="1024" height="1024" rx="185" fill="white"/>
</clipPath>
</defs>
</svg>

Before

Width:  |  Height:  |  Size: 1.3 KiB

View file

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" height="48" width="48"><path d="m16.9 33.6 7.1-7.1 7.1 7.1 2.5-2.5-7.1-7.1 7.1-7.1-2.5-2.5-7.1 7.1-7.1-7.1-2.5 2.5 7.1 7.1-7.1 7.1ZM24 45.25q-4.4 0-8.275-1.65T8.95 39.05q-2.9-2.9-4.55-6.775Q2.75 28.4 2.75 24q0-4.45 1.65-8.325 1.65-3.875 4.55-6.75t6.775-4.55Q19.6 2.7 24 2.7q4.45 0 8.325 1.675 3.875 1.675 6.75 4.55t4.55 6.75Q45.3 19.55 45.3 24q0 4.4-1.675 8.275t-4.55 6.775q-2.875 2.9-6.75 4.55T24 45.25Zm0-4.7q6.9 0 11.725-4.825Q40.55 30.9 40.55 24q0-6.9-4.825-11.725Q30.9 7.45 24 7.45q-6.9 0-11.725 4.825Q7.45 17.1 7.45 24q0 6.9 4.825 11.725Q17.1 40.55 24 40.55ZM24 24Z"/></svg>

Before

Width:  |  Height:  |  Size: 621 B

View file

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" height="48" width="48"><path d="M12.65 43.25q-2 0-3.375-1.35T7.9 38.55V10.9H5V6.2h11.55V3.8H31.4v2.4H43v4.7h-2.9v27.65q0 1.95-1.4 3.325-1.4 1.375-3.35 1.375Zm22.7-32.35h-22.7v27.65h22.7ZM17.7 34.65h3.85V14.7H17.7Zm8.75 0h3.9V14.7h-3.9ZM12.65 10.9v27.65Z"/></svg>

Before

Width:  |  Height:  |  Size: 303 B

View file

@ -1,4 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 0 24 24" width="24px" fill="#000000">
<path d="M0 0h24v24H0z" fill="none"/>
<path d="M19 9h-4V3H9v6H5l7 7 7-7zM5 18v2h14v-2H5z"/>
</svg>

Before

Width:  |  Height:  |  Size: 210 B

View file

@ -1,6 +0,0 @@
<?xml version="1.0" encoding="utf-8"?><!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
<svg width="800px" height="800px" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M6.75 1C6.33579 1 6 1.33579 6 1.75V3.50559C5.96824 3.53358 5.93715 3.56276 5.9068 3.59311L1.66416 7.83575C0.883107 8.6168 0.883107 9.88313 1.66416 10.6642L5.19969 14.1997C5.98074 14.9808 7.24707 14.9808 8.02812 14.1997L12.2708 9.95707C13.0518 9.17602 13.0518 7.90969 12.2708 7.12864L8.73522 3.59311C8.39027 3.24816 7.95066 3.05555 7.5 3.0153V1.75C7.5 1.33579 7.16421 1 6.75 1ZM6 5.62123V6.25C6 6.66421 6.33579 7 6.75 7C7.16421 7 7.5 6.66421 7.5 6.25V4.54033C7.56363 4.56467 7.62328 4.60249 7.67456 4.65377L11.2101 8.1893C11.2995 8.27875 11.348 8.39366 11.3555 8.51071H3.11052L6 5.62123ZM6.26035 13.1391L3.132 10.0107H10.0958L6.96746 13.1391C6.77219 13.3343 6.45561 13.3343 6.26035 13.1391Z" fill="#212121"/>
<path d="M2 17.5V12.4143L3.5 13.9143V17.5C3.5 18.0523 3.94772 18.5 4.5 18.5H19.5C20.0523 18.5 20.5 18.0523 20.5 17.5V6.5C20.5 5.94771 20.0523 5.5 19.5 5.5H12.0563L10.5563 4H19.5C20.8807 4 22 5.11929 22 6.5V17.5C22 18.8807 20.8807 20 19.5 20H4.5C3.11929 20 2 18.8807 2 17.5Z" fill="#212121"/>
<path d="M11 14.375C11 13.8816 11.1541 13.4027 11.3418 12.9938C11.5325 12.5784 11.7798 12.1881 12.0158 11.8595C12.2531 11.5289 12.4888 11.247 12.6647 11.0481C12.7502 10.9515 12.9062 10.7867 12.9642 10.7254L12.9697 10.7197C13.2626 10.4268 13.7374 10.4268 14.0303 10.7197L14.3353 11.0481C14.5112 11.247 14.7469 11.5289 14.9842 11.8595C15.2202 12.1881 15.4675 12.5784 15.6582 12.9938C15.8459 13.4027 16 13.8816 16 14.375C16 15.7654 14.9711 17 13.5 17C12.0289 17 11 15.7654 11 14.375ZM13.7658 12.7343C13.676 12.6092 13.5858 12.4916 13.5 12.3844C13.4142 12.4916 13.324 12.6092 13.2342 12.7343C13.0327 13.015 12.8425 13.32 12.7051 13.6195C12.5647 13.9253 12.5 14.1808 12.5 14.375C12.5 15.0663 12.9809 15.5 13.5 15.5C14.0191 15.5 14.5 15.0663 14.5 14.375C14.5 14.1808 14.4353 13.9253 14.2949 13.6195C14.1575 13.32 13.9673 13.015 13.7658 12.7343Z" fill="#212121"/>
</svg>

Before

Width:  |  Height:  |  Size: 2 KiB

View file

@ -1,5 +0,0 @@
<?xml version="1.0" encoding="utf-8"?><!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
<svg width="800px" height="800px" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M21.7092 2.29502C21.8041 2.3904 21.8757 2.50014 21.9241 2.61722C21.9727 2.73425 21.9996 2.8625 22 2.997L22 3V9C22 9.55228 21.5523 10 21 10C20.4477 10 20 9.55228 20 9V5.41421L14.7071 10.7071C14.3166 11.0976 13.6834 11.0976 13.2929 10.7071C12.9024 10.3166 12.9024 9.68342 13.2929 9.29289L18.5858 4H15C14.4477 4 14 3.55228 14 3C14 2.44772 14.4477 2 15 2H20.9998C21.2749 2 21.5242 2.11106 21.705 2.29078L21.7092 2.29502Z" fill="#000000"/>
<path d="M10.7071 14.7071L5.41421 20H9C9.55228 20 10 20.4477 10 21C10 21.5523 9.55228 22 9 22H3.00069L2.997 22C2.74301 21.9992 2.48924 21.9023 2.29502 21.7092L2.29078 21.705C2.19595 21.6096 2.12432 21.4999 2.07588 21.3828C2.02699 21.2649 2 21.1356 2 21V15C2 14.4477 2.44772 14 3 14C3.55228 14 4 14.4477 4 15V18.5858L9.29289 13.2929C9.68342 12.9024 10.3166 12.9024 10.7071 13.2929C11.0976 13.6834 11.0976 14.3166 10.7071 14.7071Z" fill="#000000"/>
</svg>

Before

Width:  |  Height:  |  Size: 1.1 KiB

View file

@ -1,2 +0,0 @@
<?xml version="1.0" encoding="utf-8"?><!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
<svg fill="#000000" width="800px" height="800px" viewBox="0 0 14 14" role="img" focusable="false" aria-hidden="true" xmlns="http://www.w3.org/2000/svg"><path d="M 7.5291661,11.795909 C 7.4168129,11.419456 7.3406864,10.225625 7.3406864,9.29222 c 0,-0.11438 -0.029767,-0.221667 -0.081573,-0.314893 0.051933,-0.115773 0.08132,-0.24358 0.08132,-0.378226 l 0,-1.709364 c 0,-0.511733 -0.416226,-0.927959 -0.9279585,-0.927959 l -0.8772919,0 C 5.527203,5.856265 5.52163,5.751005 5.518336,5.648406 5.514666,5.556066 5.513396,5.470313 5.513016,5.385826 5.511876,5.296776 5.5132694,5.224073 5.517196,5.160866 5.524666,5.024193 5.541009,4.891827 5.565076,4.773647 5.591043,4.646981 5.619669,4.564774 5.630689,4.535134 c 0.0019,-0.0052 0.0038,-0.01013 0.00557,-0.01533 0.00709,-0.02039 0.0133,-0.03559 0.017227,-0.04446 C 6.0127121,3.789698 5.750766,2.938499 5.0665137,2.5737 4.8642273,2.466034 4.6367344,2.409034 4.4084814,2.408147 4.1801018,2.409034 3.9526089,2.466037 3.7504492,2.5737 3.066197,2.938499 2.8042508,3.789698 3.1634768,4.475344 c 0.00393,0.0087 0.01026,0.02394 0.017227,0.04446 0.00177,0.0052 0.00367,0.01013 0.00557,0.01533 0.01102,0.02951 0.039647,0.111847 0.065613,0.238513 0.024067,0.11818 0.040533,0.250546 0.04788,0.387219 0.00393,0.06321 0.00532,0.135914 0.00418,0.22496 -5.066e-4,0.08449 -0.00165,0.17024 -0.00532,0.26258 -0.00329,0.102599 -0.00887,0.207859 -0.016847,0.313372 l -0.8772919,0 c -0.5117324,0 -0.9279584,0.416226 -0.9279584,0.927959 l 0,1.709364 c 0,0.134646 0.029387,0.262453 0.08132,0.378226 -0.051807,0.09323 -0.081573,0.200513 -0.081573,0.314893 0,0.933278 -0.076126,2.127236 -0.1884796,2.503689 C 1.0571435,11.985782 1.0131902,12.254315 1.0562568,12.453434 1.1748167,13 1.7477291,13 1.9359554,13 c 0.437506,0 1.226258,-0.07676 1.2595712,-0.08005 0.05092,-0.0051 0.1001932,-0.01596 0.1468065,-0.03179 0.049907,0.01241 0.1018398,0.01913 0.1546597,0.01925 l 0.9114918,0.0044 0.9114918,-0.0044 c 0.05282,-1.27e-4 0.1047532,-0.007 0.1546598,-0.01925 0.046613,0.01583 0.095886,0.02673 0.1468064,0.03179 C 5.6547556,12.92315 6.4436346,13 6.8810138,13 c 0.1882264,0 0.7612654,0 0.8796986,-0.546566 0.043067,-0.199119 -7.6e-4,-0.467652 -0.2315463,-0.657525 z m -1.833117,0.502486 -0.3480794,-1.518478 -0.1741664,1.503658 -1.6846638,-7.6e-4 -0.3680927,-0.885399 0,0.900979 c 0,0 -1.7672504,0.173279 -1.3861111,0 0.3811394,-0.173154 0.3811394,-2.980082 0.3811394,-2.980082 l 2.2924095,0 2.2924095,0 c 0,0 0,2.806928 0.3811394,2.980082 0.381266,0.173279 -1.3859844,0 -1.3859844,0 z M 10.219055,1 7.3387864,1 5.8932688,5.377719 l 0.9449318,0 c 0.3536527,0 0.6674055,0.17138 0.8650052,0.434593 l 0.04864,-0.18392 0.9107318,-2.702555 0.2962729,-0.0016 0.9543051,2.889769 -2.2085564,0 C 7.839499,5.994632 7.9204389,6.217692 7.9204389,6.459878 l 0,1.257038 2.3962751,0 0.423193,1.60917 2.218563,0 L 10.219055,1 Z"/></svg>

Before

Width:  |  Height:  |  Size: 2.9 KiB

View file

@ -1,7 +0,0 @@
<?xml version="1.0" encoding="utf-8"?><!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
<svg width="800px" height="800px" viewBox="-0.5 0 25 25" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M8.93994 9.39998V5.48999C8.93994 5.20999 9.15994 4.98999 9.43994 4.98999H20.9999C21.2799 4.98999 21.4999 5.20999 21.4999 5.48999V13.09C21.4999 13.37 21.2799 13.59 20.9999 13.59L17.0599 13.6" stroke="#0F0F0F" stroke-miterlimit="10" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M17.7301 8.72998L16.4301 10.03" stroke="#0F0F0F" stroke-miterlimit="10" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M3 11.4H14.56C14.84 11.4 15.06 11.62 15.06 11.9V19.51C15.06 19.79 14.84 20.01 14.56 20.01H3C2.72 20.01 2.5 19.79 2.5 19.51V11.9C2.5 11.63 2.72 11.4 3 11.4Z" stroke="#0F0F0F" stroke-miterlimit="10" stroke-linecap="round" stroke-linejoin="round"/>
<path d="M19.32 10.03V7.64001C19.32 7.36001 19.1 7.14001 18.82 7.14001H16.42" stroke="#0F0F0F" stroke-miterlimit="10" stroke-linecap="round" stroke-linejoin="round"/>
</svg>

Before

Width:  |  Height:  |  Size: 1.1 KiB

View file

@ -1,6 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<svg width="800px" height="800px" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M12 7C12.5523 7 13 7.44772 13 8V13C13 13.5523 12.5523 14 12 14C11.4477 14 11 13.5523 11 13V8C11 7.44772 11.4477 7 12 7Z" fill="#FF8C00"/>
<path d="M12 17C12.5523 17 13 16.5523 13 16C13 15.4477 12.5523 15 12 15C11.4477 15 11 15.4477 11 16C11 16.5523 11.4477 17 12 17Z" fill="#FF8C00"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M12 2C6.47715 2 2 6.47715 2 12C2 17.5228 6.47715 22 12 22C17.5228 22 22 17.5228 22 12C22 6.47715 17.5228 2 12 2ZM4 12C4 7.58172 7.58172 4 12 4C16.4183 4 20 7.58172 20 12C20 16.4183 16.4183 20 12 20C7.58172 20 4 16.4183 4 12Z" fill="#FFD700"/>
</svg>

Before

Width:  |  Height:  |  Size: 733 B

View file

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" height="48" width="48"><path d="M24 28.55q-2.75 0-4.6-1.95-1.85-1.95-1.85-4.75V9.55q0-2.65 1.875-4.525Q21.3 3.15 24 3.15t4.6 1.875Q30.5 6.9 30.5 9.55v12.3q0 2.8-1.875 4.75Q26.75 28.55 24 28.55Zm0-12.7ZM21.95 44.3v-6.95q-6-.65-9.95-5.05-3.95-4.4-3.95-10.45h4.15q0 4.85 3.45 8.2Q19.1 33.4 24 33.4q4.9 0 8.35-3.35 3.45-3.35 3.45-8.2h4.15q0 6.05-3.95 10.45-3.95 4.4-9.95 5.05v6.95ZM24 23.85q.8 0 1.275-.6.475-.6.475-1.4V9.55q0-.7-.525-1.2T24 7.85q-.7 0-1.225.5-.525.5-.525 1.2v12.3q0 .8.5 1.4.5.6 1.25.6Z"/></svg>

Before

Width:  |  Height:  |  Size: 550 B

View file

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" height="48" width="48"><path d="M5.15 42.85v-16.7h4.7v8.65L34.8 9.85h-8.65v-4.7h16.7v16.7h-4.7V13.2L13.2 38.15h8.65v4.7Z"/></svg>

Before

Width:  |  Height:  |  Size: 170 B

View file

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 0 24 24" width="24px" fill="#000000"><path d="M0 0h24v24H0z" fill="none"/><path d="M6 19h4V5H6v14zm8-14v14h4V5h-4z"/></svg>

Before

Width:  |  Height:  |  Size: 189 B

View file

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" height="24px" viewBox="0 0 24 24" width="24px" fill="#000000"><path d="M0 0h24v24H0z" fill="none"/><path d="M8 5v14l11-7z"/></svg>

Before

Width:  |  Height:  |  Size: 171 B

View file

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" height="48" viewBox="0 96 960 960" width="48"><path d="M388.218 873Q286 873 212.5 803.718 139 734.436 139 634q0-100.436 73.733-169.718Q286.466 395 388 395h253l-97-97 66-66 211 210-211 210-66-66 97-97H387q-63.019 0-108.01 41.5Q234 572 234 634t44.99 103.5Q323.981 779 387 779h307v94H388.218Z"/></svg>

Before

Width:  |  Height:  |  Size: 339 B

View file

@ -1,5 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<svg width="800px" height="800px" viewBox="0 0 16 16" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<rect width="16" height="16" id="icon-bound" fill="none" />
<path d="M2,8L8,8L8,14L6,14L6,11.4L1.4,16L-0.014,14.586L4.572,10L2,10L2,8ZM8,2L10,2L10,4.6L14.6,0L16.014,1.414L11.428,6L14,6L14,8L8,8L8,2Z" />
</svg>

Before

Width:  |  Height:  |  Size: 399 B

View file

@ -1,14 +0,0 @@
<?xml version="1.0" encoding="iso-8859-1"?>
<svg height="800px" width="800px" version="1.1" id="Capa_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
viewBox="0 0 493.347 493.347" xml:space="preserve">
<g>
<path style="fill:#010002;" d="M191.936,385.946c-14.452,0-29.029-1.36-43.319-4.04l-5.299-0.996l-66.745,37.15v-63.207
l-6.629-4.427C25.496,320.716,0,277.045,0,230.617c0-85.648,86.102-155.33,191.936-155.33c17.077,0,33.623,1.838,49.394,5.239
c-50.486,27.298-84.008,74.801-84.008,128.765c0,72.969,61.25,134.147,142.942,149.464
C269.41,375.892,232.099,385.946,191.936,385.946z"/>
<path style="fill:#010002;" d="M437.777,304.278l-6.629,4.427v48.075l-50.933-28.343l-0.125,0.024l-5.167,0.967
c-11.444,2.142-23.104,3.228-34.673,3.228c-1.241,0-2.47-0.054-3.705-0.078c-82.707-1.599-149.387-56.268-149.387-123.287
c0-52.109,40.324-96.741,97.129-114.791c14.47-4.594,30.001-7.471,46.219-8.3c3.228-0.167,6.468-0.274,9.75-0.274
c84.413,0,153.092,55.343,153.092,123.365C493.347,246.053,473.089,280.679,437.777,304.278z"/>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 1 KiB

View file

@ -1,23 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
<svg fill="#000000" height="800px" width="800px" version="1.1" id="anna_vital_language_icon" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink"
viewBox="0 0 256 256" enable-background="new 0 0 256 256" xml:space="preserve">
<path d="M62.4,101c-1.5-2.1-2.1-3.4-1.8-3.9c0.2-0.5,1.6-0.7,3.9-0.5c2.3,0.2,4.2,0.5,5.8,0.9c1.5,0.4,2.8,1,3.8,1.7
c1,0.7,1.8,1.5,2.3,2.6c0.6,1,1,2.3,1.4,3.7c0.7,2.8,0.5,4.7-0.5,5.7c-1.1,1-2.6,0.8-4.6-0.6c-2.1-1.4-3.9-2.8-5.5-4.2
C65.5,105.1,63.9,103.2,62.4,101z M40.7,190.1c4.8-2.1,9-4.2,12.6-6.4c3.5-2.1,6.6-4.4,9.3-6.8c2.6-2.3,5-4.9,7-7.7
c2-2.7,3.8-5.8,5.4-9.2c1.3,1.2,2.5,2.4,3.8,3.5c1.2,1.1,2.5,2.2,3.8,3.4c1.3,1.2,2.8,2.4,4.3,3.8c1.5,1.4,3.3,2.8,5.3,4.5
c0.7,0.5,1.4,0.9,2.1,1c0.7,0.1,1.7,0,3.1-0.6c1.3-0.5,3-1.4,5.1-2.8c2.1-1.3,4.7-3.1,7.9-5.4c1.6-1.1,2.4-2,2.3-2.7
c-0.1-0.7-1-1-2.7-0.9c-3.1,0.1-5.9,0.1-8.3-0.1c-2.5-0.2-5-0.6-7.4-1.4c-2.4-0.8-4.9-1.9-7.5-3.4c-2.6-1.5-5.6-3.6-9.1-6.2
c1-3.9,1.8-8,2.4-12.4c0.3-2.5,0.6-4.3,0.8-5.6c0.2-1.2,0.5-2.4,0.9-3.3c0.3-0.8,0.4-1.4,0.5-1.9c0.1-0.5-0.1-1-0.4-1.6
c-0.4-0.5-1-1.1-1.9-1.7c-0.9-0.6-2.2-1.4-3.9-2.3c2.4-0.9,5.1-1.7,7.9-2.6c2.7-0.9,5.7-1.8,8.8-2.7c3-0.9,4.5-1.9,4.6-3.1
c0.1-1.2-0.9-2.3-3.2-3.5c-1.5-0.8-2.9-1.1-4.3-0.9c-1.4,0.2-3.2,0.9-5.4,2.2c-0.6,0.4-1.8,0.9-3.4,1.6c-1.7,0.7-3.6,1.5-6,2.5
c-2.4,1-5,2-7.8,3.1c-2.9,1.1-5.8,2.2-8.7,3.2c-2.9,1.1-5.7,2-8.2,2.8c-2.6,0.8-4.6,1.4-6.1,1.6c-3.8,0.8-5.8,1.6-5.9,2.4
c0,0.8,1.5,1.6,4.4,2.4c1.2,0.3,2.3,0.6,3.1,0.6c0.8,0.1,1.7,0.1,2.5,0c0.8-0.1,1.6-0.3,2.4-0.5c0.8-0.3,1.7-0.7,2.8-1.1
c1.6-0.8,3.9-1.7,6.9-2.8c2.9-1,6.6-2.4,11.2-4c0.9,2.7,1.4,6,1.4,9.8c0,3.8-0.4,8.1-1.4,13c-1.3-1.1-2.7-2.3-4.2-3.6
c-1.5-1.3-2.9-2.6-4.3-3.9c-1.6-1.5-3.2-2.5-4.7-3c-1.6-0.5-3.4-0.5-5.5,0c-3.3,0.9-5,1.9-4.9,3.1c0,1.2,1.3,1.8,3.8,1.9
c0.9,0.1,1.8,0.3,2.7,0.6c0.9,0.3,1.9,0.9,3.2,1.8c1.3,0.9,2.9,2.2,4.7,3.8c1.8,1.6,4.2,3.7,7,6.3c-1.2,2.9-2.6,5.6-4.1,8
c-1.5,2.5-3.4,5-5.5,7.3c-2.2,2.4-4.7,4.8-7.7,7.2c-3,2.5-6.6,5.1-10.8,7.8c-4.3,2.8-6.5,4.7-6.5,5.6C35,192.1,37,191.7,40.7,190.1z
M250.5,81.8v165.3l-111.6-36.4L10.5,253.4V76.1l29.9-10V10.4l81.2,28.7L231.3,2.6v73.1L250.5,81.8z M124.2,50.6L22.3,84.6v152.2
l101.9-33.9V50.6L124.2,50.6z M219.4,71.9V19L138.1,46L219.4,71.9z M227,201.9L196.5,92L176,85.6l-30.9,90.8l18.9,5.9l5.8-18.7
l31.9,10l5.7,22.3L227,201.9z M174.8,147.7l22.2,6.9l-10.9-42.9L174.8,147.7z"/>
</svg>

Before

Width:  |  Height:  |  Size: 2.4 KiB

View file

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" height="48" viewBox="0 96 960 960" width="48"><path d="M267 873v-94h306q63.423 0 108.712-41.5Q727 696 727 634t-45.288-103.5Q636.423 489 573 489H319l97 97-66 66-211-210 211-210 66 66-97 97h253q101.534 0 175.267 69.282Q821 533.564 821 634q0 100.436-73.733 169.718Q673.534 873 572 873H267Z"/></svg>

Before

Width:  |  Height:  |  Size: 336 B

View file

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" height="48" viewBox="0 -960 960 960" width="48"><path d="M160-200v-60h640v60H160Zm320-136L280-536l42-42 128 128v-310h60v310l128-128 42 42-200 200Z" transform="rotate(180 480 -480)"/></svg>

Before

Width:  |  Height:  |  Size: 229 B

View file

@ -1,19 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Uploaded to: SVG Repo, www.svgrepo.com, Generator: SVG Repo Mixer Tools -->
<svg width="800px" height="800px" viewBox="0 -0.5 21 21" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<title>url [#1423]</title>
<desc>Created with Sketch.</desc>
<defs>
</defs>
<g id="Page-1" stroke="none" stroke-width="1" fill="none" fill-rule="evenodd">
<g id="Dribbble-Light-Preview" transform="translate(-339.000000, -600.000000)" fill="#000000">
<g id="icons" transform="translate(56.000000, 160.000000)">
<path d="M286.388001,443.226668 C288.054626,441.639407 290.765027,441.639407 292.431651,443.226668 L293.942296,444.665378 L295.452942,443.226668 L293.942296,441.787958 C291.439155,439.404014 287.380498,439.404014 284.877356,441.787958 C282.374215,444.171902 282.374215,448.03729 284.877356,450.421235 L286.388001,451.859945 L287.898647,450.421235 L286.388001,448.982525 C284.721377,447.395264 284.721377,444.813929 286.388001,443.226668 L286.388001,443.226668 Z M302.122644,449.578765 L300.611999,448.139038 L299.101353,449.578765 L300.611999,451.017475 C302.277554,452.603719 302.277554,455.186071 300.611999,456.773332 C298.945374,458.359576 296.233905,458.359576 294.568349,456.773332 L293.057704,455.333605 L291.54599,456.773332 L293.057704,458.212042 C295.560845,460.595986 299.619502,460.595986 302.122644,458.212042 C304.625785,455.828098 304.625785,451.96271 302.122644,449.578765 L302.122644,449.578765 Z M288.653969,443.946023 L299.856676,454.61425 L298.344962,456.053977 L287.143324,445.384733 L288.653969,443.946023 Z" id="url-[#1423]">
</path>
</g>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 1.7 KiB

View file

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" height="48" viewBox="0 96 960 960" width="48"><path d="M480.118 726Q551 726 600.5 676.382q49.5-49.617 49.5-120.5Q650 485 600.382 435.5q-49.617-49.5-120.5-49.5Q409 386 359.5 435.618q-49.5 49.617-49.5 120.5Q310 627 359.618 676.5q49.617 49.5 120.5 49.5ZM480 652q-40 0-68-28t-28-68q0-40 28-68t68-28q40 0 68 28t28 68q0 40-28 68t-68 28Zm0 227q-154 0-278-90T17 556q61-143 185-233t278-90q154 0 278 90t185 233q-61 143-185 233t-278 90Zm0-323Zm-.08 240q120.454 0 221.267-65.5T855 556q-53-109-153.733-174.5Q600.533 316 480.08 316q-120.454 0-221.267 65.5T104 556q54 109 154.733 174.5Q359.467 796 479.92 796Z"/></svg>

Before

Width:  |  Height:  |  Size: 644 B

View file

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" height="48" viewBox="0 96 960 960" width="48"><path d="m634 624-77-76q21-44-12.5-69t-62.5-6l-70-71q15-8 33-12t35-4q71 0 120.5 49.5T650 556q0 16-4 36t-12 32Zm148 150-55-56q44-35 76.5-77t52.5-85q-52-110-150.5-175T490 316q-42 0-82 7t-59 16l-66-66q35-16 92.5-28T485 233q147 0 272 85t186 238q-25 66-67.5 121.5T782 774Zm25 227L653 849q-35 14-80 22t-93 8q-151 0-276.5-85.5T17 556q18-51 55-103t86-100L35 232l50-52 769 769-47 52ZM216 410q-36 29-66 68.5T104 556q52 111 153 175.5T488 796q27 0 55.5-3t44.5-11l-64-64q-8 4-20.5 6t-23.5 2q-70 0-120-49t-50-121q0-11 1.5-23t4.5-21L216 410Zm323 128Zm-137 69Z"/></svg>

Before

Width:  |  Height:  |  Size: 640 B

View file

@ -1,102 +0,0 @@
import faulthandler
import logging
import multiprocessing
import os
import platform
import sys
from pathlib import Path
from typing import TextIO
# Set up CUDA library paths before any torch imports
# This must happen before platformdirs or any other imports that might indirectly load torch
import buzz.cuda_setup # noqa: F401
from platformdirs import user_log_dir, user_cache_dir, user_data_dir
# Will download all Huggingface data to the app cache directory
os.environ.setdefault("HF_HOME", user_cache_dir("Buzz"))
from buzz.assets import APP_BASE_DIR
# Check for segfaults if not running in frozen mode
# Note: On Windows, faulthandler can print "Windows fatal exception" messages
# for non-fatal RPC errors (0x800706be) during multiprocessing operations.
# These are usually harmless but noisy, so we disable faulthandler on Windows.
if getattr(sys, "frozen", False) is False and platform.system() != "Windows":
faulthandler.enable()
# Sets stdout/stderr to no-op TextIO when None (run as Windows GUI with --noconsole).
# stdout fix: torch.hub uses sys.stdout.write() for download progress and crashes if None.
# stderr fix: Resolves https://github.com/chidiwilliams/buzz/issues/221
if sys.stdout is None:
sys.stdout = TextIO()
if sys.stderr is None:
sys.stderr = TextIO()
# Adds the current directory to the PATH, so the ffmpeg binary get picked up:
# https://stackoverflow.com/a/44352931/9830227
os.environ["PATH"] += os.pathsep + APP_BASE_DIR
# Add the app directory to the DLL list: https://stackoverflow.com/a/64303856
if platform.system() == "Windows":
os.add_dll_directory(APP_BASE_DIR)
dll_backup_dir = os.path.join(APP_BASE_DIR, "dll_backup")
if os.path.isdir(dll_backup_dir):
os.add_dll_directory(dll_backup_dir)
onnx_dll_dir = os.path.join(APP_BASE_DIR, "onnxruntime", "capi")
if os.path.isdir(onnx_dll_dir):
os.add_dll_directory(onnx_dll_dir)
def main():
if platform.system() == "Linux":
multiprocessing.set_start_method("spawn")
# Fixes opening new window when app has been frozen on Windows:
# https://stackoverflow.com/a/33979091
multiprocessing.freeze_support()
log_dir = user_log_dir(appname="Buzz")
os.makedirs(log_dir, exist_ok=True)
log_format = (
"[%(asctime)s] %(module)s.%(funcName)s:%(lineno)d %(levelname)s -> %(message)s"
)
logging.basicConfig(
filename=os.path.join(log_dir, "logs.txt"),
level=logging.DEBUG,
format=log_format,
)
# Silence noisy third-party library loggers
logging.getLogger("matplotlib").setLevel(logging.WARNING)
logging.getLogger("graphviz").setLevel(logging.WARNING)
logging.getLogger("nemo_logger").setLevel(logging.ERROR)
logging.getLogger("nemo_logging").setLevel(logging.ERROR)
logging.getLogger("numba").setLevel(logging.WARNING)
logging.getLogger("torio._extension.utils").setLevel(logging.WARNING)
logging.getLogger("export_config_manager").setLevel(logging.WARNING)
logging.getLogger("training_telemetry_provider").setLevel(logging.ERROR)
logging.getLogger("default_recorder").setLevel(logging.WARNING)
logging.getLogger("config").setLevel(logging.WARNING)
if getattr(sys, "frozen", False) is False:
stdout_handler = logging.StreamHandler(sys.stdout)
stdout_handler.setLevel(logging.DEBUG)
stdout_handler.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(stdout_handler)
from buzz.cli import parse_command_line
from buzz.widgets.application import Application
logging.debug("app_dir: %s", APP_BASE_DIR)
logging.debug("log_dir: %s", log_dir)
logging.debug("cache_dir: %s", user_cache_dir("Buzz"))
logging.debug("data_dir: %s", user_data_dir("Buzz"))
app = Application(sys.argv)
parse_command_line(app)
app.show_main_window()
sys.exit(app.exec())

View file

@ -1,79 +0,0 @@
import json
import logging
import os
import pickle
from typing import List
from platformdirs import user_cache_dir
from buzz.transcriber.transcriber import FileTranscriptionTask
class TasksCache:
def __init__(self, cache_dir=user_cache_dir("Buzz")):
os.makedirs(cache_dir, exist_ok=True)
self.cache_dir = cache_dir
self.pickle_cache_file_path = os.path.join(cache_dir, "tasks")
self.tasks_list_file_path = os.path.join(cache_dir, "tasks.json")
def save(self, tasks: List[FileTranscriptionTask]):
self.save_json_tasks(tasks=tasks)
def load(self) -> List[FileTranscriptionTask]:
if os.path.exists(self.tasks_list_file_path):
return self.load_json_tasks()
try:
with open(self.pickle_cache_file_path, "rb") as file:
return pickle.load(file)
except FileNotFoundError:
return []
except (
pickle.UnpicklingError,
AttributeError,
ValueError,
): # delete corrupted cache
os.remove(self.pickle_cache_file_path)
return []
def load_json_tasks(self) -> List[FileTranscriptionTask]:
task_ids: List[int]
try:
with open(self.tasks_list_file_path) as file:
task_ids = json.load(file)
except json.JSONDecodeError:
logging.debug(
"Got JSONDecodeError while reading tasks list file path, "
"resetting cache..."
)
task_ids = []
tasks = []
for task_id in task_ids:
try:
with open(self.get_task_path(task_id=task_id)) as file:
tasks.append(FileTranscriptionTask.from_json(file.read()))
except (FileNotFoundError, json.JSONDecodeError):
pass
return tasks
def save_json_tasks(self, tasks: List[FileTranscriptionTask]):
json_str = json.dumps([task.id for task in tasks])
with open(self.tasks_list_file_path, "w") as file:
file.write(json_str)
for task in tasks:
file_path = self.get_task_path(task_id=task.id)
json_str = task.to_json()
with open(file_path, "w") as file:
file.write(json_str)
def get_task_path(self, task_id: int):
path = os.path.join(self.cache_dir, "transcriptions", f"{task_id}.json")
os.makedirs(os.path.dirname(path), exist_ok=True)
return path
def clear(self):
if os.path.exists(self.pickle_cache_file_path):
os.remove(self.pickle_cache_file_path)

View file

@ -1,253 +0,0 @@
import enum
import sys
import typing
import urllib.parse
from PyQt6.QtCore import QCommandLineParser, QCommandLineOption
from buzz.model_loader import (
ModelType,
WhisperModelSize,
TranscriptionModel,
ModelDownloader,
)
from buzz.store.keyring_store import get_password, Key
from buzz.transcriber.transcriber import (
Task,
FileTranscriptionTask,
FileTranscriptionOptions,
TranscriptionOptions,
LANGUAGES,
OutputFormat,
)
from buzz.widgets.application import Application
class CommandLineError(Exception):
def __init__(self, message: str):
super().__init__(message)
class CommandLineModelType(enum.Enum):
WHISPER = "whisper"
WHISPER_CPP = "whispercpp"
HUGGING_FACE = "huggingface"
FASTER_WHISPER = "fasterwhisper"
OPEN_AI_WHISPER_API = "openaiapi"
def parse_command_line(app: Application):
parser = QCommandLineParser()
try:
parse(app, parser)
except CommandLineError as exc:
print(f"Error: {str(exc)}\n", file=sys.stderr)
print(parser.helpText())
sys.exit(1)
def is_url(path: str) -> bool:
parsed = urllib.parse.urlparse(path)
return all([parsed.scheme, parsed.netloc])
def parse(app: Application, parser: QCommandLineParser):
parser.addPositionalArgument("<command>", "One of the following commands:\n- add")
parser.parse(app.arguments())
args = parser.positionalArguments()
if len(args) == 0:
parser.addHelpOption()
parser.addVersionOption()
parser.process(app)
return
command = args[0]
if command == "add":
parser.clearPositionalArguments()
parser.addPositionalArgument("files", "Input file paths", "[file file file...]")
task_option = QCommandLineOption(
["t", "task"],
f"The task to perform. Allowed: {join_values(Task)}. Default: {Task.TRANSCRIBE.value}.",
"task",
Task.TRANSCRIBE.value,
)
model_type_option = QCommandLineOption(
["m", "model-type"],
f"Model type. Allowed: {join_values(CommandLineModelType)}. Default: {CommandLineModelType.WHISPER.value}.",
"model-type",
CommandLineModelType.WHISPER.value,
)
model_size_option = QCommandLineOption(
["s", "model-size"],
f"Model size. Use only when --model-type is whisper, whispercpp, or fasterwhisper. Allowed: {join_values(WhisperModelSize)}. Default: {WhisperModelSize.TINY.value}.",
"model-size",
WhisperModelSize.TINY.value,
)
hugging_face_model_id_option = QCommandLineOption(
["hfid"],
'Hugging Face model ID. Use only when --model-type is huggingface. Example: "openai/whisper-tiny"',
"id",
)
language_option = QCommandLineOption(
["l", "language"],
f'Language code. Allowed: {", ".join(sorted([k + " (" + LANGUAGES[k].title() + ")" for k in LANGUAGES]))}. Leave empty to detect language.',
"code",
"",
)
initial_prompt_option = QCommandLineOption(
["p", "prompt"], "Initial prompt.", "prompt", ""
)
word_timestamp_option = QCommandLineOption(
["w", "word-timestamps"], "Generate word-level timestamps."
)
extract_speech_option = QCommandLineOption(
["e", "extract-speech"], "Extract speech from audio before transcribing."
)
open_ai_access_token_option = QCommandLineOption(
"openai-token",
f"OpenAI access token. Use only when --model-type is {CommandLineModelType.OPEN_AI_WHISPER_API.value}. Defaults to your previously saved access token, if one exists.",
"token",
)
output_directory_option = QCommandLineOption(
["d", "output-directory"], "Output directory", "directory"
)
srt_option = QCommandLineOption(["srt"], "Output result in an SRT file.")
vtt_option = QCommandLineOption(["vtt"], "Output result in a VTT file.")
txt_option = QCommandLineOption("txt", "Output result in a TXT file.")
hide_gui_option = QCommandLineOption("hide-gui", "Hide the main application window.")
parser.addOptions(
[
task_option,
model_type_option,
model_size_option,
hugging_face_model_id_option,
language_option,
initial_prompt_option,
word_timestamp_option,
extract_speech_option,
open_ai_access_token_option,
output_directory_option,
srt_option,
vtt_option,
txt_option,
hide_gui_option,
]
)
parser.addHelpOption()
parser.addVersionOption()
parser.process(app)
# slice after first argument, the command
file_paths = parser.positionalArguments()[1:]
if len(file_paths) == 0:
raise CommandLineError("No input files")
task = parse_enum_option(task_option, parser, Task)
model_type = parse_enum_option(model_type_option, parser, CommandLineModelType)
model_size = parse_enum_option(model_size_option, parser, WhisperModelSize)
hugging_face_model_id = parser.value(hugging_face_model_id_option)
if (
hugging_face_model_id == ""
and model_type == CommandLineModelType.HUGGING_FACE
):
raise CommandLineError(
"--hfid is required when --model-type is huggingface"
)
model = TranscriptionModel(
model_type=ModelType[model_type.name],
whisper_model_size=model_size,
hugging_face_model_id=hugging_face_model_id,
)
ModelDownloader(model=model).run()
model_path = model.get_local_model_path()
if model_path is None:
raise CommandLineError("Model not found")
language = parser.value(language_option)
if language == "":
language = None
elif LANGUAGES.get(language) is None:
raise CommandLineError("Invalid language option")
initial_prompt = parser.value(initial_prompt_option)
word_timestamps = parser.isSet(word_timestamp_option)
extract_speech = parser.isSet(extract_speech_option)
output_formats: typing.Set[OutputFormat] = set()
if parser.isSet(srt_option):
output_formats.add(OutputFormat.SRT)
if parser.isSet(vtt_option):
output_formats.add(OutputFormat.VTT)
if parser.isSet(txt_option):
output_formats.add(OutputFormat.TXT)
openai_access_token = parser.value(open_ai_access_token_option)
if (
model.model_type == ModelType.OPEN_AI_WHISPER_API
and openai_access_token == ""
):
openai_access_token = get_password(key=Key.OPENAI_API_KEY)
if openai_access_token == "":
raise CommandLineError("No OpenAI access token found")
output_directory = parser.value(output_directory_option)
transcription_options = TranscriptionOptions(
model=model,
task=task,
language=language,
initial_prompt=initial_prompt,
word_level_timings=word_timestamps,
extract_speech=extract_speech,
openai_access_token=openai_access_token,
)
for file_path in file_paths:
path_is_url = is_url(file_path)
file_transcription_options = FileTranscriptionOptions(
file_paths=[file_path] if not path_is_url else None,
url=file_path if path_is_url else None,
output_formats=output_formats,
)
transcription_task = FileTranscriptionTask(
file_path=file_path if not path_is_url else None,
url=file_path if path_is_url else None,
source=FileTranscriptionTask.Source.FILE_IMPORT if not path_is_url else FileTranscriptionTask.Source.URL_IMPORT,
model_path=model_path,
transcription_options=transcription_options,
file_transcription_options=file_transcription_options,
output_directory=output_directory if output_directory != "" else None,
)
app.add_task(transcription_task, quit_on_complete=True)
if parser.isSet(hide_gui_option):
app.hide_main_window = True
T = typing.TypeVar("T", bound=enum.Enum)
def parse_enum_option(
option: QCommandLineOption, parser: QCommandLineParser, enum_class: typing.Type[T]
) -> T:
try:
return enum_class(parser.value(option))
except ValueError:
raise CommandLineError(f"Invalid value for --{option.names()[-1]} option.")
def join_values(enum_class: typing.Type[enum.Enum]) -> str:
return ", ".join([v.value for v in enum_class])

View file

@ -1,130 +0,0 @@
"""
CUDA library path setup for nvidia packages installed via pip.
This module must be imported BEFORE any torch or CUDA-dependent libraries are imported.
It handles locating and loading CUDA libraries (cuDNN, cuBLAS, etc.) from the nvidia
pip packages.
On Windows: Uses os.add_dll_directory() to add library paths
On Linux: Uses ctypes to preload libraries (LD_LIBRARY_PATH is read at process start)
On macOS: No action needed (CUDA not supported)
"""
import ctypes
import logging
import os
import platform
import sys
from pathlib import Path
logger = logging.getLogger(__name__)
def _get_nvidia_package_lib_dirs() -> list[Path]:
"""Find all nvidia package library directories in site-packages."""
lib_dirs = []
# Find site-packages directories
site_packages_dirs = []
for path in sys.path:
if "site-packages" in path:
site_packages_dirs.append(Path(path))
# Also check relative to the current module for frozen apps
if getattr(sys, "frozen", False):
# For frozen apps, check the _internal directory
frozen_lib_dir = Path(sys._MEIPASS) if hasattr(sys, "_MEIPASS") else Path(sys.executable).parent
nvidia_dir = frozen_lib_dir / "nvidia"
if nvidia_dir.exists():
for pkg_dir in nvidia_dir.iterdir():
if pkg_dir.is_dir():
lib_subdir = pkg_dir / "lib"
if lib_subdir.exists():
lib_dirs.append(lib_subdir)
# Some packages have bin directory on Windows
bin_subdir = pkg_dir / "bin"
if bin_subdir.exists():
lib_dirs.append(bin_subdir)
# Check each site-packages for nvidia packages
for sp_dir in site_packages_dirs:
nvidia_dir = sp_dir / "nvidia"
if nvidia_dir.exists():
for pkg_dir in nvidia_dir.iterdir():
if pkg_dir.is_dir():
lib_subdir = pkg_dir / "lib"
if lib_subdir.exists():
lib_dirs.append(lib_subdir)
# Some packages have bin directory on Windows
bin_subdir = pkg_dir / "bin"
if bin_subdir.exists():
lib_dirs.append(bin_subdir)
return lib_dirs
def _setup_windows_dll_directories():
"""Add nvidia library directories to Windows DLL search path."""
lib_dirs = _get_nvidia_package_lib_dirs()
for lib_dir in lib_dirs:
try:
os.add_dll_directory(str(lib_dir))
except (OSError, AttributeError) as e:
pass
def _preload_linux_libraries():
"""Preload CUDA libraries on Linux using ctypes.
On Linux, LD_LIBRARY_PATH is only read at process start, so we need to
manually load the libraries using ctypes before torch tries to load them.
"""
lib_dirs = _get_nvidia_package_lib_dirs()
# Libraries to skip - NVBLAS requires special configuration and causes issues
skip_patterns = ["libnvblas"]
loaded_libs = set()
for lib_dir in lib_dirs:
if not lib_dir.exists():
continue
# Find all .so files in the directory
for lib_file in sorted(lib_dir.glob("*.so*")):
if lib_file.name in loaded_libs:
continue
if lib_file.is_symlink() and not lib_file.exists():
continue
# Skip problematic libraries
if any(pattern in lib_file.name for pattern in skip_patterns):
continue
try:
# Use RTLD_GLOBAL so symbols are available to other libraries
ctypes.CDLL(str(lib_file), mode=ctypes.RTLD_GLOBAL)
loaded_libs.add(lib_file.name)
except OSError as e:
# Some libraries may have missing dependencies, that's ok
pass
def setup_cuda_libraries():
"""Set up CUDA library paths for the current platform.
This function should be called as early as possible, before any torch
or CUDA-dependent libraries are imported.
"""
system = platform.system()
if system == "Windows":
_setup_windows_dll_directories()
elif system == "Linux":
_preload_linux_libraries()
# macOS doesn't have CUDA support, so nothing to do
# Auto-run setup when this module is imported
setup_cuda_libraries()

View file

View file

@ -1,63 +0,0 @@
# Adapted from https://github.com/zhiyiYo/Groove
from abc import ABC
from typing import TypeVar, Generic, Any, Type, List
from PyQt6.QtSql import QSqlDatabase, QSqlQuery, QSqlRecord
from buzz.db.entity.entity import Entity
T = TypeVar("T", bound=Entity)
class DAO(ABC, Generic[T]):
entity: Type[T]
ignore_fields = []
def __init__(self, table: str, db: QSqlDatabase):
self.db = db
self.table = table
def insert(self, record: T):
query = self._create_query()
fields = [
field for field in record.__dict__.keys() if field not in self.ignore_fields
]
query.prepare(
f"""
INSERT INTO {self.table} ({", ".join(fields)})
VALUES ({", ".join([f":{key}" for key in fields])})
"""
)
for field in fields:
query.bindValue(f":{field}", getattr(record, field))
if not query.exec():
raise Exception(query.lastError().text())
def find_by_id(self, id: Any) -> T | None:
query = self._create_query()
query.prepare(f"SELECT * FROM {self.table} WHERE id = :id")
query.bindValue(":id", id)
return self._execute(query)
def to_entity(self, record: QSqlRecord) -> T:
kwargs = {record.fieldName(i): record.value(i) for i in range(record.count())}
return self.entity(**kwargs)
def _execute(self, query: QSqlQuery) -> T | None:
if not query.exec():
raise Exception(query.lastError().text())
if not query.first():
return None
return self.to_entity(query.record())
def _execute_all(self, query: QSqlQuery) -> List[T]:
if not query.exec():
raise Exception(query.lastError().text())
entities = []
while query.next():
entities.append(self.to_entity(query.record()))
return entities
def _create_query(self):
return QSqlQuery(self.db)

View file

@ -1,320 +0,0 @@
import uuid
from datetime import datetime
from uuid import UUID
from PyQt6.QtSql import QSqlDatabase
from buzz.db.dao.dao import DAO
from buzz.db.entity.transcription import Transcription
from buzz.transcriber.transcriber import FileTranscriptionTask
class TranscriptionDAO(DAO[Transcription]):
entity = Transcription
def __init__(self, db: QSqlDatabase):
super().__init__("transcription", db)
def create_transcription(self, task: FileTranscriptionTask):
query = self._create_query()
query.prepare(
"""
INSERT INTO transcription (
id,
export_formats,
file,
output_folder,
language,
model_type,
source,
status,
task,
time_queued,
url,
whisper_model_size,
hugging_face_model_id,
word_level_timings,
extract_speech,
name,
notes
) VALUES (
:id,
:export_formats,
:file,
:output_folder,
:language,
:model_type,
:source,
:status,
:task,
:time_queued,
:url,
:whisper_model_size,
:hugging_face_model_id,
:word_level_timings,
:extract_speech,
:name,
:notes
)
"""
)
query.bindValue(":id", str(task.uid))
query.bindValue(
":export_formats",
", ".join(
[
output_format.value
for output_format in task.file_transcription_options.output_formats
]
),
)
query.bindValue(":file", task.file_path)
query.bindValue(":output_folder", task.output_directory)
query.bindValue(":language", task.transcription_options.language)
query.bindValue(
":model_type", task.transcription_options.model.model_type.value
)
query.bindValue(":source", task.source.value)
query.bindValue(":status", FileTranscriptionTask.Status.QUEUED.value)
query.bindValue(":task", task.transcription_options.task.value)
query.bindValue(":time_queued", datetime.now().isoformat())
query.bindValue(":url", task.url)
query.bindValue(
":whisper_model_size",
task.transcription_options.model.whisper_model_size.value
if task.transcription_options.model.whisper_model_size
else None,
)
query.bindValue(
":hugging_face_model_id",
task.transcription_options.model.hugging_face_model_id
if task.transcription_options.model.hugging_face_model_id
else None,
)
query.bindValue(
":word_level_timings",
task.transcription_options.word_level_timings
)
query.bindValue(
":extract_speech",
task.transcription_options.extract_speech
)
query.bindValue(":name", None) # name is not available in FileTranscriptionTask
query.bindValue(":notes", None) # notes is not available in FileTranscriptionTask
if not query.exec():
raise Exception(query.lastError().text())
def copy_transcription(self, id: UUID) -> UUID:
query = self._create_query()
query.prepare("SELECT * FROM transcription WHERE id = :id")
query.bindValue(":id", str(id))
if not query.exec():
raise Exception(query.lastError().text())
if not query.next():
raise Exception("Transcription not found")
transcription_data = {field.name: query.value(field.name) for field in
self.entity.__dataclass_fields__.values()}
new_id = uuid.uuid4()
transcription_data["id"] = str(new_id)
transcription_data["time_queued"] = datetime.now().isoformat()
transcription_data["status"] = FileTranscriptionTask.Status.QUEUED.value
query.prepare(
"""
INSERT INTO transcription (
id,
export_formats,
file,
output_folder,
language,
model_type,
source,
status,
task,
time_queued,
url,
whisper_model_size,
hugging_face_model_id,
word_level_timings,
extract_speech,
name,
notes
) VALUES (
:id,
:export_formats,
:file,
:output_folder,
:language,
:model_type,
:source,
:status,
:task,
:time_queued,
:url,
:whisper_model_size,
:hugging_face_model_id,
:word_level_timings,
:extract_speech,
:name,
:notes
)
"""
)
for key, value in transcription_data.items():
query.bindValue(f":{key}", value)
if not query.exec():
raise Exception(query.lastError().text())
return new_id
def update_transcription_as_started(self, id: UUID):
query = self._create_query()
query.prepare(
"""
UPDATE transcription
SET status = :status, time_started = :time_started
WHERE id = :id
"""
)
query.bindValue(":id", str(id))
query.bindValue(":status", FileTranscriptionTask.Status.IN_PROGRESS.value)
query.bindValue(":time_started", datetime.now().isoformat())
if not query.exec():
raise Exception(query.lastError().text())
def update_transcription_as_failed(self, id: UUID, error: str):
query = self._create_query()
query.prepare(
"""
UPDATE transcription
SET status = :status, time_ended = :time_ended, error_message = :error_message
WHERE id = :id
"""
)
query.bindValue(":id", str(id))
query.bindValue(":status", FileTranscriptionTask.Status.FAILED.value)
query.bindValue(":time_ended", datetime.now().isoformat())
query.bindValue(":error_message", error)
if not query.exec():
raise Exception(query.lastError().text())
def update_transcription_as_canceled(self, id: UUID):
query = self._create_query()
query.prepare(
"""
UPDATE transcription
SET status = :status, time_ended = :time_ended
WHERE id = :id
"""
)
query.bindValue(":id", str(id))
query.bindValue(":status", FileTranscriptionTask.Status.CANCELED.value)
query.bindValue(":time_ended", datetime.now().isoformat())
if not query.exec():
raise Exception(query.lastError().text())
def update_transcription_progress(self, id: UUID, progress: float):
query = self._create_query()
query.prepare(
"""
UPDATE transcription
SET status = :status, progress = :progress
WHERE id = :id
"""
)
query.bindValue(":id", str(id))
query.bindValue(":status", FileTranscriptionTask.Status.IN_PROGRESS.value)
query.bindValue(":progress", progress)
if not query.exec():
raise Exception(query.lastError().text())
def update_transcription_as_completed(self, id: UUID):
query = self._create_query()
query.prepare(
"""
UPDATE transcription
SET status = :status, time_ended = :time_ended
WHERE id = :id
"""
)
query.bindValue(":id", str(id))
query.bindValue(":status", FileTranscriptionTask.Status.COMPLETED.value)
query.bindValue(":time_ended", datetime.now().isoformat())
if not query.exec():
raise Exception(query.lastError().text())
def update_transcription_file_and_name(self, id: UUID, file_path: str, name: str | None = None):
query = self._create_query()
query.prepare(
"""
UPDATE transcription
SET file = :file, name = COALESCE(:name, name)
WHERE id = :id
"""
)
query.bindValue(":id", str(id))
query.bindValue(":file", file_path)
query.bindValue(":name", name)
if not query.exec():
raise Exception(query.lastError().text())
def update_transcription_name(self, id: UUID, name: str):
query = self._create_query()
query.prepare(
"""
UPDATE transcription
SET name = :name
WHERE id = :id
"""
)
query.bindValue(":id", str(id))
query.bindValue(":name", name)
if not query.exec():
raise Exception(query.lastError().text())
if query.numRowsAffected() == 0:
raise Exception("Transcription not found")
def update_transcription_notes(self, id: UUID, notes: str):
query = self._create_query()
query.prepare(
"""
UPDATE transcription
SET notes = :notes
WHERE id = :id
"""
)
query.bindValue(":id", str(id))
query.bindValue(":notes", notes)
if not query.exec():
raise Exception(query.lastError().text())
if query.numRowsAffected() == 0:
raise Exception("Transcription not found")
def reset_transcription_for_restart(self, id: UUID):
"""Reset a transcription to queued status for restart"""
query = self._create_query()
query.prepare(
"""
UPDATE transcription
SET status = :status, progress = :progress, time_started = NULL, time_ended = NULL, error_message = NULL
WHERE id = :id
"""
)
query.bindValue(":id", str(id))
query.bindValue(":status", FileTranscriptionTask.Status.QUEUED.value)
query.bindValue(":progress", 0.0)
if not query.exec():
raise Exception(query.lastError().text())
if query.numRowsAffected() == 0:
raise Exception("Transcription not found")

View file

@ -1,53 +0,0 @@
from typing import List
from uuid import UUID
from PyQt6.QtSql import QSqlDatabase
from buzz.db.dao.dao import DAO
from buzz.db.entity.transcription_segment import TranscriptionSegment
class TranscriptionSegmentDAO(DAO[TranscriptionSegment]):
entity = TranscriptionSegment
ignore_fields = ["id"]
def __init__(self, db: QSqlDatabase):
super().__init__("transcription_segment", db)
def get_segments(self, transcription_id: UUID) -> List[TranscriptionSegment]:
query = self._create_query()
query.prepare(
f"""
SELECT * FROM {self.table}
WHERE transcription_id = :transcription_id
"""
)
query.bindValue(":transcription_id", str(transcription_id))
return self._execute_all(query)
def delete_segments(self, transcription_id: UUID):
query = self._create_query()
query.prepare(
f"""
DELETE FROM {self.table}
WHERE transcription_id = :transcription_id
"""
)
query.bindValue(":transcription_id", str(transcription_id))
if not query.exec():
raise Exception(query.lastError().text())
def update_segment_translation(self, segment_id: int, translation: str):
query = self._create_query()
query.prepare(
"""
UPDATE transcription_segment
SET translation = :translation
WHERE id = :id
"""
)
query.bindValue(":id", segment_id)
query.bindValue(":translation", translation)
if not query.exec():
raise Exception(query.lastError().text())

View file

@ -1,52 +0,0 @@
import logging
import os
import sqlite3
import tempfile
from PyQt6.QtSql import QSqlDatabase
from platformdirs import user_data_dir
from buzz.db.helpers import (
run_sqlite_migrations,
copy_transcriptions_from_json_to_sqlite,
mark_in_progress_and_queued_transcriptions_as_canceled,
)
def setup_app_db() -> QSqlDatabase:
data_dir = user_data_dir("Buzz")
os.makedirs(data_dir, exist_ok=True)
return _setup_db(os.path.join(data_dir, "Buzz.sqlite"))
def setup_test_db() -> QSqlDatabase:
return _setup_db(tempfile.mktemp())
def _setup_db(path: str) -> QSqlDatabase:
# Run migrations
db = sqlite3.connect(path, isolation_level=None, timeout=10.0)
try:
run_sqlite_migrations(db)
copy_transcriptions_from_json_to_sqlite(db)
mark_in_progress_and_queued_transcriptions_as_canceled(db)
db.commit()
finally:
db.close()
db = QSqlDatabase.addDatabase("QSQLITE")
db.setDatabaseName(path)
if not db.open():
raise RuntimeError(f"Failed to open database connection: {db.databaseName()}")
db.exec('PRAGMA foreign_keys = ON')
logging.debug("Database connection opened: %s", db.databaseName())
return db
def close_app_db():
db = QSqlDatabase.database()
if not db.isValid():
return
if db.isOpen():
db.close()

View file

@ -1,12 +0,0 @@
from abc import ABC
from PyQt6.QtSql import QSqlRecord
class Entity(ABC):
@classmethod
def from_record(cls, record: QSqlRecord):
entity = cls()
for i in range(record.count()):
setattr(entity, record.fieldName(i), record.value(i))
return entity

View file

@ -1,66 +0,0 @@
import datetime
import os
import uuid
from dataclasses import dataclass, field
from buzz.db.entity.entity import Entity
from buzz.model_loader import ModelType
from buzz.settings.settings import Settings
from buzz.transcriber.transcriber import OutputFormat, Task, FileTranscriptionTask
@dataclass
class Transcription(Entity):
status: str = FileTranscriptionTask.Status.QUEUED.value
task: str = Task.TRANSCRIBE.value
model_type: str = ModelType.WHISPER.value
whisper_model_size: str | None = None
hugging_face_model_id: str | None = None
word_level_timings: str | None = None
extract_speech: str | None = None
language: str | None = None
id: str = field(default_factory=lambda: str(uuid.uuid4()))
error_message: str | None = None
file: str | None = None
time_queued: str = datetime.datetime.now().isoformat()
progress: float = 0.0
time_ended: str | None = None
time_started: str | None = None
export_formats: str | None = None
output_folder: str | None = None
source: str | None = None
url: str | None = None
name: str | None = None
notes: str | None = None
@property
def id_as_uuid(self):
return uuid.UUID(hex=self.id)
@property
def status_as_status(self):
return FileTranscriptionTask.Status(self.status)
def get_output_file_path(
self,
output_format: OutputFormat,
output_directory: str | None = None,
):
input_file_name = os.path.splitext(os.path.basename(self.file))[0]
date_time_now = datetime.datetime.now().strftime("%d-%b-%Y %H-%M-%S")
export_file_name_template = Settings().get_default_export_file_template()
output_file_name = (
export_file_name_template.replace("{{ input_file_name }}", input_file_name)
.replace("{{ task }}", self.task)
.replace("{{ language }}", self.language or "")
.replace("{{ model_type }}", self.model_type)
.replace("{{ model_size }}", self.whisper_model_size or "")
.replace("{{ date_time }}", date_time_now)
+ f".{output_format.value}"
)
output_directory = output_directory or os.path.dirname(self.file)
return os.path.join(output_directory, output_file_name)

View file

@ -1,13 +0,0 @@
from dataclasses import dataclass
from buzz.db.entity.entity import Entity
@dataclass
class TranscriptionSegment(Entity):
start_time: int
end_time: int
text: str
translation: str
transcription_id: str
id: int = -1

View file

@ -1,89 +0,0 @@
import os
from datetime import datetime
from sqlite3 import Connection
from buzz.assets import get_path
from buzz.cache import TasksCache
from buzz.db.migrator import dumb_migrate_db
def copy_transcriptions_from_json_to_sqlite(conn: Connection):
cache = TasksCache()
if os.path.exists(cache.tasks_list_file_path):
tasks = cache.load()
cursor = conn.cursor()
for task in tasks:
cursor.execute(
"""
INSERT INTO transcription (id, error_message, export_formats, file, output_folder, progress, language, model_type, source, status, task, time_ended, time_queued, time_started, url, whisper_model_size, hugging_face_model_id)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, COALESCE(?, ?), ?, ?, ?, ?)
RETURNING id;
""",
(
str(task.uid),
task.error,
", ".join(
[
format.value
for format in task.file_transcription_options.output_formats
]
),
task.file_path,
task.output_directory,
task.fraction_completed,
task.transcription_options.language,
task.transcription_options.model.model_type.value,
task.source.value,
task.status.value,
task.transcription_options.task.value,
task.completed_at,
task.queued_at, datetime.now().isoformat(),
task.started_at,
task.url,
task.transcription_options.model.whisper_model_size.value
if task.transcription_options.model.whisper_model_size
else None,
task.transcription_options.model.hugging_face_model_id
if task.transcription_options.model.hugging_face_model_id
else None,
),
)
transcription_id = cursor.fetchone()[0]
for segment in task.segments:
cursor.execute(
"""
INSERT INTO transcription_segment (end_time, start_time, text, translation, transcription_id)
VALUES (?, ?, ?, ?, ?);
""",
(
segment.end,
segment.start,
segment.text,
segment.translation,
transcription_id,
),
)
# os.remove(cache.tasks_list_file_path)
conn.commit()
def run_sqlite_migrations(db: Connection):
schema_path = get_path("schema.sql")
with open(schema_path) as schema_file:
schema = schema_file.read()
dumb_migrate_db(db=db, schema=schema)
def mark_in_progress_and_queued_transcriptions_as_canceled(conn: Connection):
cursor = conn.cursor()
cursor.execute(
"""
UPDATE transcription
SET status = 'canceled', time_ended = ?
WHERE status = 'in_progress' OR status = 'queued';
""",
(datetime.now().isoformat(),),
)
conn.commit()

View file

@ -1,285 +0,0 @@
# coding: utf-8
# https://gist.github.com/simonw/664b4b0851c1899dc55e1fb655181037
"""Simple declarative schema migration for SQLite.
See <https://david.rothlis.net/declarative-schema-migration-for-sqlite>.
Author: William Manley <will@stb-tester.com>.
Copyright © 2019-2022 Stb-tester.com Ltd.
License: MIT.
"""
import logging
import re
import sqlite3
from textwrap import dedent
def dumb_migrate_db(db, schema, allow_deletions=False):
"""
Migrates a database to the new schema given by the SQL text `schema`
preserving the data. We create any table that exists in schema, delete any
old table that is no longer used and add/remove columns and indices as
necessary.
Under this scheme there are a set of changes that we can make to the schema
and this script will handle it fine:
1. Adding a new table
2. Adding, deleting or modifying an index
3. Adding a column to an existing table as long as the new column can be
NULL or has a DEFAULT value specified.
4. Changing a column to remove NULL or DEFAULT as long as all values in the
database are not NULL
5. Changing the type of a column
6. Changing the user_version
In addition this function is capable of:
1. Deleting tables
2. Deleting columns from tables
But only if allow_deletions=True. If the new schema requires a column/table
to be deleted and allow_deletions=False this function will raise
`RuntimeError`.
Note: When this function is called a transaction must not be held open on
db. A transaction will be used internally. If you wish to perform
additional migration steps as part of a migration use DBMigrator directly.
Any internally generated rowid columns by SQLite may change values by this
migration.
"""
with DBMigrator(db, schema, allow_deletions) as migrator:
migrator.migrate()
return bool(migrator.n_changes)
class DBMigrator:
def __init__(self, db, schema, allow_deletions=False):
self.db = db
self.schema = schema
self.allow_deletions = allow_deletions
self.pristine = sqlite3.connect(":memory:")
self.pristine.executescript(schema)
self.n_changes = 0
self.orig_foreign_keys = None
def log_execute(self, msg, sql, args=None):
# It's important to log any changes we're making to the database for
# forensics later
msg_tmpl = "Database migration: %s with SQL:\n%s"
msg_argv = (msg, _left_pad(dedent(sql)))
if args:
msg_tmpl += " args = %r"
msg_argv += (args,)
else:
args = []
# Uncomment this to get debugging information
# logging.info(msg_tmpl, *msg_argv)
self.db.execute(sql, args)
self.n_changes += 1
def __enter__(self):
self.orig_foreign_keys = self.db.execute("PRAGMA foreign_keys").fetchone()[0]
if self.orig_foreign_keys:
self.log_execute(
"Disable foreign keys temporarily for migration",
"PRAGMA foreign_keys = OFF",
)
# This doesn't count as a change because we'll undo it at the end
self.n_changes = 0
self.db.__enter__()
self.db.execute("BEGIN")
return self
def __exit__(self, exc_type, exc_value, exc_tb):
self.db.__exit__(exc_type, exc_value, exc_tb)
if exc_value is None:
# The SQLite docs say:
#
# > This pragma is a no-op within a transaction; foreign key
# > constraint enforcement may only be enabled or disabled when
# > there is no pending BEGIN or SAVEPOINT.
old_changes = self.n_changes
new_val = self._migrate_pragma("foreign_keys")
if new_val == self.orig_foreign_keys:
self.n_changes = old_changes
# SQLite docs say:
#
# > A VACUUM will fail if there is an open transaction on the database
# > connection that is attempting to run the VACUUM.
if self.n_changes:
self.db.execute("VACUUM")
else:
if self.orig_foreign_keys:
self.log_execute(
"Re-enable foreign keys after migration", "PRAGMA foreign_keys = ON"
)
def migrate(self):
# In CI the database schema may be changing all the time. This checks
# the current db and if it doesn't match database.sql we will
# modify it so it does match where possible.
pristine_tables = dict(
self.pristine.execute(
"""\
SELECT name, sql FROM sqlite_master
WHERE type = \"table\" AND name != \"sqlite_sequence\""""
).fetchall()
)
pristine_indices = dict(
self.pristine.execute(
"""\
SELECT name, sql FROM sqlite_master
WHERE type = \"index\""""
).fetchall()
)
tables = dict(
self.db.execute(
"""\
SELECT name, sql FROM sqlite_master
WHERE type = \"table\" AND name != \"sqlite_sequence\""""
).fetchall()
)
new_tables = set(pristine_tables.keys()) - set(tables.keys())
removed_tables = set(tables.keys()) - set(pristine_tables.keys())
if removed_tables and not self.allow_deletions:
raise RuntimeError(
"Database migration: Refusing to delete tables %r" % removed_tables
)
modified_tables = set(
name
for name, sql in pristine_tables.items()
if normalise_sql(tables.get(name, "")) != normalise_sql(sql)
)
# This PRAGMA is automatically disabled when the db is committed
self.db.execute("PRAGMA defer_foreign_keys = TRUE")
# New and removed tables are easy:
for tbl_name in new_tables:
self.log_execute("Create table %s" % tbl_name, pristine_tables[tbl_name])
for tbl_name in removed_tables:
self.log_execute("Drop table %s" % tbl_name, "DROP TABLE %s" % tbl_name)
for tbl_name in modified_tables:
# The SQLite documentation insists that we create the new table and
# rename it over the old rather than moving the old out of the way
# and then creating the new
create_table_sql = pristine_tables[tbl_name]
create_table_sql = re.sub(
r"\b%s\b" % re.escape(tbl_name),
tbl_name + "_migration_new",
create_table_sql,
)
self.log_execute(
"Columns change: Create table %s with updated schema" % tbl_name,
create_table_sql,
)
cols = set(
[x[1] for x in self.db.execute("PRAGMA table_info(%s)" % tbl_name)]
)
pristine_cols = set(
[
x[1]
for x in self.pristine.execute("PRAGMA table_info(%s)" % tbl_name)
]
)
removed_columns = cols - pristine_cols
if not self.allow_deletions and removed_columns:
logging.warning(
"Database migration: Refusing to remove columns %r from "
"table %s. Current cols are %r attempting migration to %r",
removed_columns,
tbl_name,
cols,
pristine_cols,
)
raise RuntimeError(
"Database migration: Refusing to remove columns %r from "
"table %s" % (removed_columns, tbl_name)
)
logging.info("cols: %s, pristine_cols: %s", cols, pristine_cols)
self.log_execute(
"Migrate data for table %s" % tbl_name,
"""\
INSERT INTO {tbl_name}_migration_new ({common})
SELECT {common} FROM {tbl_name}""".format(
tbl_name=tbl_name,
common=", ".join(cols.intersection(pristine_cols)),
),
)
# Don't need the old table any more
self.log_execute(
"Drop old table %s now data has been migrated" % tbl_name,
"DROP TABLE %s" % tbl_name,
)
self.log_execute(
"Columns change: Move new table %s over old" % tbl_name,
"ALTER TABLE %s_migration_new RENAME TO %s" % (tbl_name, tbl_name),
)
# Migrate the indices
indices = dict(
self.db.execute(
"""\
SELECT name, sql FROM sqlite_master
WHERE type = \"index\""""
).fetchall()
)
for name in set(indices.keys()) - set(pristine_indices.keys()):
self.log_execute(
"Dropping obsolete index %s" % name, "DROP INDEX %s" % name
)
for name, sql in pristine_indices.items():
if name not in indices:
self.log_execute("Creating new index %s" % name, sql)
elif sql != indices[name]:
self.log_execute(
"Index %s changed: Dropping old version" % name,
"DROP INDEX %s" % name,
)
self.log_execute(
"Index %s changed: Creating updated version in its place" % name,
sql,
)
self._migrate_pragma("user_version")
if self.pristine.execute("PRAGMA foreign_keys").fetchone()[0]:
if self.db.execute("PRAGMA foreign_key_check").fetchall():
raise RuntimeError("Database migration: Would fail foreign_key_check")
def _migrate_pragma(self, pragma):
pristine_val = self.pristine.execute("PRAGMA %s" % pragma).fetchone()[0]
val = self.db.execute("PRAGMA %s" % pragma).fetchone()[0]
if val != pristine_val:
self.log_execute(
"Set %s to %i from %i" % (pragma, pristine_val, val),
"PRAGMA %s = %i" % (pragma, pristine_val),
)
return pristine_val
def _left_pad(text, indent=" "):
"""Maybe I can find a package in pypi for this?"""
return "\n".join(indent + line for line in text.split("\n"))
def normalise_sql(sql):
# Remove comments:
sql = re.sub(r"--[^\n]*\n", "", sql)
# Normalise whitespace:
sql = re.sub(r"\s+", " ", sql)
sql = re.sub(r" *([(),]) *", r"\1", sql)
# Remove unnecessary quotes
sql = re.sub(r'"(\w+)"', r"\1", sql)
return sql.strip()

View file

@ -1,79 +0,0 @@
from typing import List
from uuid import UUID
from buzz.db.dao.transcription_dao import TranscriptionDAO
from buzz.db.dao.transcription_segment_dao import TranscriptionSegmentDAO
from buzz.db.entity.transcription_segment import TranscriptionSegment
from buzz.transcriber.transcriber import Segment
class TranscriptionService:
def __init__(
self,
transcription_dao: TranscriptionDAO,
transcription_segment_dao: TranscriptionSegmentDAO,
):
self.transcription_dao = transcription_dao
self.transcription_segment_dao = transcription_segment_dao
def create_transcription(self, task):
self.transcription_dao.create_transcription(task)
def copy_transcription(self, id: UUID) -> UUID:
return self.transcription_dao.copy_transcription(id)
def update_transcription_as_started(self, id: UUID):
self.transcription_dao.update_transcription_as_started(id)
def update_transcription_as_failed(self, id: UUID, error: str):
self.transcription_dao.update_transcription_as_failed(id, error)
def update_transcription_as_canceled(self, id: UUID):
self.transcription_dao.update_transcription_as_canceled(id)
def update_transcription_progress(self, id: UUID, progress: float):
self.transcription_dao.update_transcription_progress(id, progress)
def update_transcription_as_completed(self, id: UUID, segments: List[Segment]):
self.transcription_dao.update_transcription_as_completed(id)
for segment in segments:
self.transcription_segment_dao.insert(
TranscriptionSegment(
start_time=segment.start,
end_time=segment.end,
text=segment.text,
translation='',
transcription_id=str(id),
)
)
def update_transcription_file_and_name(self, id: UUID, file_path: str, name: str | None = None):
self.transcription_dao.update_transcription_file_and_name(id, file_path, name)
def update_transcription_name(self, id: UUID, name: str):
self.transcription_dao.update_transcription_name(id, name)
def update_transcription_notes(self, id: UUID, notes: str):
self.transcription_dao.update_transcription_notes(id, notes)
def reset_transcription_for_restart(self, id: UUID):
self.transcription_dao.reset_transcription_for_restart(id)
def replace_transcription_segments(self, id: UUID, segments: List[Segment]):
self.transcription_segment_dao.delete_segments(id)
for segment in segments:
self.transcription_segment_dao.insert(
TranscriptionSegment(
start_time=segment.start,
end_time=segment.end,
text=segment.text,
translation='',
transcription_id=str(id),
)
)
def get_transcription_segments(self, transcription_id: UUID):
return self.transcription_segment_dao.get_segments(transcription_id)
def update_segment_translation(self, segment_id: int, translation: str):
return self.transcription_segment_dao.update_segment_translation(segment_id, translation)

View file

@ -1,11 +0,0 @@
from PyQt6.QtWidgets import QWidget, QMessageBox
def show_model_download_error_dialog(parent: QWidget, error: str):
message = (
parent.tr("An error occurred while loading the Whisper model")
+ f": {error}{'' if error.endswith('.') else '.'}"
+ parent.tr("Please retry or check the application logs for more information.")
)
QMessageBox.critical(parent, "", message)

View file

@ -1,282 +0,0 @@
import logging
import multiprocessing
import os
import queue
import ssl
import sys
from pathlib import Path
from typing import Optional, Tuple, List, Set
from uuid import UUID
# Fix SSL certificate verification for bundled applications (macOS, Windows)
# This must be done before importing demucs which uses torch.hub with urllib
try:
import certifi
os.environ.setdefault('REQUESTS_CA_BUNDLE', certifi.where())
os.environ.setdefault('SSL_CERT_FILE', certifi.where())
os.environ.setdefault('SSL_CERT_DIR', os.path.dirname(certifi.where()))
# Also update the default SSL context for urllib
ssl._create_default_https_context = lambda: ssl.create_default_context(cafile=certifi.where())
except ImportError:
pass
from PyQt6.QtCore import QObject, QThread, pyqtSignal, pyqtSlot, Qt
# Patch subprocess for demucs to prevent console windows on Windows
if sys.platform == "win32":
import subprocess
_original_run = subprocess.run
_original_check_output = subprocess.check_output
def _patched_run(*args, **kwargs):
if 'startupinfo' not in kwargs:
si = subprocess.STARTUPINFO()
si.dwFlags |= subprocess.STARTF_USESHOWWINDOW
si.wShowWindow = subprocess.SW_HIDE
kwargs['startupinfo'] = si
if 'creationflags' not in kwargs:
kwargs['creationflags'] = subprocess.CREATE_NO_WINDOW
return _original_run(*args, **kwargs)
def _patched_check_output(*args, **kwargs):
if 'startupinfo' not in kwargs:
si = subprocess.STARTUPINFO()
si.dwFlags |= subprocess.STARTF_USESHOWWINDOW
si.wShowWindow = subprocess.SW_HIDE
kwargs['startupinfo'] = si
if 'creationflags' not in kwargs:
kwargs['creationflags'] = subprocess.CREATE_NO_WINDOW
return _original_check_output(*args, **kwargs)
subprocess.run = _patched_run
subprocess.check_output = _patched_check_output
from demucs import api as demucsApi
from buzz.locale import _
from buzz.model_loader import ModelType
from buzz.transcriber.file_transcriber import FileTranscriber
from buzz.transcriber.openai_whisper_api_file_transcriber import (
OpenAIWhisperAPIFileTranscriber,
)
from buzz.transcriber.transcriber import FileTranscriptionTask, Segment
from buzz.transcriber.whisper_file_transcriber import WhisperFileTranscriber
class FileTranscriberQueueWorker(QObject):
tasks_queue: multiprocessing.Queue
current_task: Optional[FileTranscriptionTask] = None
current_transcriber: Optional[FileTranscriber] = None
current_transcriber_thread: Optional[QThread] = None
task_started = pyqtSignal(FileTranscriptionTask)
task_progress = pyqtSignal(FileTranscriptionTask, float)
task_download_progress = pyqtSignal(FileTranscriptionTask, float)
task_completed = pyqtSignal(FileTranscriptionTask, list)
task_error = pyqtSignal(FileTranscriptionTask, str)
completed = pyqtSignal()
trigger_run = pyqtSignal()
def __init__(self, parent: Optional[QObject] = None):
super().__init__(parent)
self.tasks_queue = queue.Queue()
self.canceled_tasks: Set[UUID] = set()
self.current_transcriber = None
self.speech_path = None
self.is_running = False
# Use QueuedConnection to ensure run() is called in the correct thread context
# and doesn't block signal handlers
self.trigger_run.connect(self.run, Qt.ConnectionType.QueuedConnection)
@pyqtSlot()
def run(self):
if self.is_running:
return
logging.debug("Waiting for next transcription task")
# Clean up of previous run.
if self.current_transcriber is not None:
self.current_transcriber.stop()
self.current_transcriber = None
# Get next non-canceled task from queue
while True:
self.current_task: Optional[FileTranscriptionTask] = self.tasks_queue.get()
# Stop listening when a "None" task is received
if self.current_task is None:
self.is_running = False
self.completed.emit()
return
if self.current_task.uid in self.canceled_tasks:
continue
break
# Set is_running AFTER we have a valid task to process
self.is_running = True
if self.current_task.transcription_options.extract_speech:
logging.debug("Will extract speech")
def separator_progress_callback(progress):
self.task_progress.emit(self.current_task, int(progress["segment_offset"] * 100) / int(progress["audio_length"] * 100))
separator = None
separated = None
try:
# Force CPU if specified, otherwise use CUDA if available
force_cpu = os.getenv("BUZZ_FORCE_CPU", "false").lower() == "true"
if force_cpu:
device = "cpu"
else:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
separator = demucsApi.Separator(
device=device,
progress=True,
callback=separator_progress_callback,
)
_origin, separated = separator.separate_audio_file(Path(self.current_task.file_path))
task_file_path = Path(self.current_task.file_path)
self.speech_path = task_file_path.with_name(f"{task_file_path.stem}_speech.mp3")
demucsApi.save_audio(separated["vocals"], self.speech_path, separator.samplerate)
self.current_task.file_path = str(self.speech_path)
except Exception as e:
logging.error(f"Error during speech extraction: {e}", exc_info=True)
self.task_error.emit(
self.current_task,
_("Speech extraction failed! Check your internet connection — a model may need to be downloaded."),
)
self.is_running = False
return
finally:
# Release memory used by speech extractor
del separator, separated
try:
import torch
if torch.cuda.is_available():
torch.cuda.empty_cache()
except Exception:
pass
logging.debug("Starting next transcription task")
self.task_progress.emit(self.current_task, 0)
model_type = self.current_task.transcription_options.model.model_type
if model_type == ModelType.OPEN_AI_WHISPER_API:
self.current_transcriber = OpenAIWhisperAPIFileTranscriber(
task=self.current_task
)
elif (
model_type == ModelType.WHISPER_CPP
or model_type == ModelType.HUGGING_FACE
or model_type == ModelType.WHISPER
or model_type == ModelType.FASTER_WHISPER
):
self.current_transcriber = WhisperFileTranscriber(task=self.current_task)
else:
raise Exception(f"Unknown model type: {model_type}")
self.current_transcriber_thread = QThread(self)
self.current_transcriber.moveToThread(self.current_transcriber_thread)
self.current_transcriber_thread.started.connect(self.current_transcriber.run)
self.current_transcriber.completed.connect(self.current_transcriber_thread.quit)
self.current_transcriber.error.connect(self.current_transcriber_thread.quit)
self.current_transcriber.completed.connect(self.current_transcriber.deleteLater)
self.current_transcriber.error.connect(self.current_transcriber.deleteLater)
self.current_transcriber_thread.finished.connect(
self.current_transcriber_thread.deleteLater
)
self.current_transcriber.progress.connect(self.on_task_progress)
self.current_transcriber.download_progress.connect(
self.on_task_download_progress
)
self.current_transcriber.error.connect(self.on_task_error)
self.current_transcriber.completed.connect(self.on_task_completed)
# Wait for next item on the queue
self.current_transcriber.error.connect(lambda: self._on_task_finished())
self.current_transcriber.completed.connect(lambda: self._on_task_finished())
self.task_started.emit(self.current_task)
self.current_transcriber_thread.start()
def _on_task_finished(self):
"""Called when a task completes or errors, resets state and triggers next run"""
self.is_running = False
# Use signal to avoid blocking in signal handler context
self.trigger_run.emit()
def add_task(self, task: FileTranscriptionTask):
# Remove from canceled tasks if it was previously canceled (for restart functionality)
if task.uid in self.canceled_tasks:
self.canceled_tasks.remove(task.uid)
self.tasks_queue.put(task)
# If the worker is not currently running, trigger it to start processing
# Use signal to avoid blocking the main thread
if not self.is_running:
self.trigger_run.emit()
def cancel_task(self, task_id: UUID):
self.canceled_tasks.add(task_id)
if self.current_task is not None and self.current_task.uid == task_id:
if self.current_transcriber is not None:
self.current_transcriber.stop()
if self.current_transcriber_thread is not None:
if not self.current_transcriber_thread.wait(5000):
logging.warning("Transcriber thread did not terminate gracefully")
self.current_transcriber_thread.terminate()
def on_task_error(self, error: str):
if (
self.current_task is not None
and self.current_task.uid not in self.canceled_tasks
):
# Check if the error indicates cancellation
if "canceled" in error.lower() or "cancelled" in error.lower():
self.current_task.status = FileTranscriptionTask.Status.CANCELED
self.current_task.error = error
else:
self.current_task.status = FileTranscriptionTask.Status.FAILED
self.current_task.error = error
self.task_error.emit(self.current_task, error)
@pyqtSlot(tuple)
def on_task_progress(self, progress: Tuple[int, int]):
if self.current_task is not None:
self.task_progress.emit(self.current_task, progress[0] / progress[1])
def on_task_download_progress(self, fraction_downloaded: float):
if self.current_task is not None:
self.task_download_progress.emit(self.current_task, fraction_downloaded)
@pyqtSlot(list)
def on_task_completed(self, segments: List[Segment]):
if self.current_task is not None:
self.task_completed.emit(self.current_task, segments)
if self.speech_path is not None:
try:
Path(self.speech_path).unlink()
except Exception:
pass
self.speech_path = None
def stop(self):
self.tasks_queue.put(None)
if self.current_transcriber is not None:
self.current_transcriber.stop()

View file

@ -1,23 +0,0 @@
import os
import logging
import gettext
from PyQt6.QtCore import QLocale
from buzz.assets import get_path
from buzz.settings.settings import APP_NAME, Settings
locale_dir = get_path("locale")
gettext.bindtextdomain("buzz", locale_dir)
settings = Settings()
languages = [
settings.value(settings.Key.UI_LOCALE, QLocale().name())
]
translate = gettext.translation(
APP_NAME.lower(), locale_dir, languages=languages, fallback=True
)
_ = translate.gettext

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,5 +0,0 @@
import os
def file_path_as_title(file_path: str):
return os.path.basename(file_path)

View file

@ -1,56 +0,0 @@
from typing import Optional
import logging
import numpy as np
import sounddevice
from PyQt6.QtCore import QObject, pyqtSignal
class RecordingAmplitudeListener(QObject):
stream: Optional[sounddevice.InputStream] = None
amplitude_changed = pyqtSignal(float)
average_amplitude_changed = pyqtSignal(float)
ACCUMULATION_SECONDS = 1
def __init__(
self,
input_device_index: Optional[int] = None,
parent: Optional[QObject] = None,
):
super().__init__(parent)
self.input_device_index = input_device_index
self.buffer = np.ndarray([], dtype=np.float32)
self.accumulation_size = 0
self._active = True
def start_recording(self):
try:
self.stream = sounddevice.InputStream(
device=self.input_device_index,
dtype="float32",
channels=1,
callback=self.stream_callback,
)
self.stream.start()
self.accumulation_size = int(self.stream.samplerate * self.ACCUMULATION_SECONDS)
except Exception as e:
self.stop_recording()
logging.exception("Failed to start audio stream on device %s: %s", self.input_device_index, e)
def stop_recording(self):
self._active = False
if self.stream is not None:
self.stream.stop()
self.stream.close()
def stream_callback(self, in_data: np.ndarray, frame_count, time_info, status):
if not self._active:
return
chunk = in_data.ravel()
self.amplitude_changed.emit(float(np.sqrt(np.mean(chunk**2))))
self.buffer = np.append(self.buffer, chunk)
if self.buffer.size >= self.accumulation_size:
self.average_amplitude_changed.emit(float(np.sqrt(np.mean(self.buffer**2))))
self.buffer = np.ndarray([], dtype=np.float32)

View file

@ -1,34 +0,0 @@
CREATE TABLE transcription (
id TEXT PRIMARY KEY,
error_message TEXT,
export_formats TEXT,
file TEXT,
output_folder TEXT,
progress DOUBLE PRECISION DEFAULT 0.0,
language TEXT,
model_type TEXT,
source TEXT,
status TEXT,
task TEXT,
time_ended TIMESTAMP,
time_queued TIMESTAMP NOT NULL,
time_started TIMESTAMP,
url TEXT,
whisper_model_size TEXT,
hugging_face_model_id TEXT,
word_level_timings BOOLEAN DEFAULT FALSE,
extract_speech BOOLEAN DEFAULT FALSE,
name TEXT,
notes TEXT
);
CREATE TABLE transcription_segment (
id INTEGER PRIMARY KEY,
end_time INT DEFAULT 0,
start_time INT DEFAULT 0,
text TEXT NOT NULL,
translation TEXT DEFAULT '',
transcription_id TEXT,
FOREIGN KEY (transcription_id) REFERENCES transcription(id) ON DELETE CASCADE
);
CREATE INDEX idx_transcription_id ON transcription_segment(transcription_id);

View file

@ -1,7 +0,0 @@
from enum import Enum
from buzz.locale import _
class RecordingTranscriberMode(Enum):
APPEND_BELOW = _("Append below")
APPEND_ABOVE = _("Append above")
APPEND_AND_CORRECT = _("Append and correct")

View file

@ -1,165 +0,0 @@
import enum
import typing
import logging
import uuid
from PyQt6.QtCore import QSettings
APP_NAME = "Buzz"
class Settings:
def __init__(self, application=""):
self.settings = QSettings(APP_NAME, application)
self.settings.sync()
class Key(enum.Enum):
RECORDING_TRANSCRIBER_TASK = "recording-transcriber/task"
RECORDING_TRANSCRIBER_MODEL = "recording-transcriber/model"
RECORDING_TRANSCRIBER_LANGUAGE = "recording-transcriber/language"
RECORDING_TRANSCRIBER_INITIAL_PROMPT = "recording-transcriber/initial-prompt"
RECORDING_TRANSCRIBER_ENABLE_LLM_TRANSLATION = "recording-transcriber/enable-llm-translation"
RECORDING_TRANSCRIBER_LLM_MODEL = "recording-transcriber/llm-model"
RECORDING_TRANSCRIBER_LLM_PROMPT = "recording-transcriber/llm-prompt"
RECORDING_TRANSCRIBER_EXPORT_ENABLED = "recording-transcriber/export-enabled"
RECORDING_TRANSCRIBER_EXPORT_FOLDER = "recording-transcriber/export-folder"
RECORDING_TRANSCRIBER_MODE = "recording-transcriber/mode"
RECORDING_TRANSCRIBER_SILENCE_THRESHOLD = "recording-transcriber/silence-threshold"
RECORDING_TRANSCRIBER_LINE_SEPARATOR = "recording-transcriber/line-separator"
RECORDING_TRANSCRIBER_TRANSCRIPTION_STEP = "recording-transcriber/transcription-step"
RECORDING_TRANSCRIBER_EXPORT_FILE_TYPE = "recording-transcriber/export-file-type"
RECORDING_TRANSCRIBER_EXPORT_MAX_ENTRIES = "recording-transcriber/export-max-entries"
RECORDING_TRANSCRIBER_EXPORT_FILE_NAME = "recording-transcriber/export-file-name"
RECORDING_TRANSCRIBER_HIDE_UNCONFIRMED = "recording-transcriber/hide-unconfirmed"
PRESENTATION_WINDOW_TEXT_COLOR = "presentation-window/text-color"
PRESENTATION_WINDOW_BACKGROUND_COLOR = "presentation-window/background-color"
PRESENTATION_WINDOW_TEXT_SIZE = "presentation-window/text-size"
PRESENTATION_WINDOW_THEME = "presentation-window/theme"
FILE_TRANSCRIBER_TASK = "file-transcriber/task"
FILE_TRANSCRIBER_MODEL = "file-transcriber/model"
FILE_TRANSCRIBER_LANGUAGE = "file-transcriber/language"
FILE_TRANSCRIBER_INITIAL_PROMPT = "file-transcriber/initial-prompt"
FILE_TRANSCRIBER_ENABLE_LLM_TRANSLATION = "file-transcriber/enable-llm-translation"
FILE_TRANSCRIBER_LLM_MODEL = "file-transcriber/llm-model"
FILE_TRANSCRIBER_LLM_PROMPT = "file-transcriber/llm-prompt"
FILE_TRANSCRIBER_WORD_LEVEL_TIMINGS = "file-transcriber/word-level-timings"
FILE_TRANSCRIBER_EXPORT_FORMATS = "file-transcriber/export-formats"
DEFAULT_EXPORT_FILE_NAME = "transcriber/default-export-file-name"
CUSTOM_OPENAI_BASE_URL = "transcriber/custom-openai-base-url"
OPENAI_API_MODEL = "transcriber/openai-api-model"
CUSTOM_FASTER_WHISPER_ID = "transcriber/custom-faster-whisper-id"
HUGGINGFACE_MODEL_ID = "transcriber/huggingface-model-id"
SHORTCUTS = "shortcuts"
FONT_SIZE = "font-size"
UI_LOCALE = "ui-locale"
USER_IDENTIFIER = "user-identifier"
TRANSCRIPTION_TASKS_TABLE_COLUMN_VISIBILITY = (
"transcription-tasks-table/column-visibility"
)
TRANSCRIPTION_TASKS_TABLE_COLUMN_ORDER = (
"transcription-tasks-table/column-order"
)
TRANSCRIPTION_TASKS_TABLE_COLUMN_WIDTHS = (
"transcription-tasks-table/column-widths"
)
TRANSCRIPTION_TASKS_TABLE_SORT_STATE = (
"transcription-tasks-table/sort-state"
)
MAIN_WINDOW = "main-window"
TRANSCRIPTION_VIEWER = "transcription-viewer"
AUDIO_PLAYBACK_RATE = "audio/playback-rate"
FORCE_CPU = "force-cpu"
REDUCE_GPU_MEMORY = "reduce-gpu-memory"
LAST_UPDATE_CHECK = "update/last-check"
UPDATE_AVAILABLE_VERSION = "update/available-version"
def get_user_identifier(self) -> str:
user_id = self.value(self.Key.USER_IDENTIFIER, "")
if not user_id:
user_id = str(uuid.uuid4())
self.set_value(self.Key.USER_IDENTIFIER, user_id)
return user_id
def set_value(self, key: Key, value: typing.Any) -> None:
self.settings.setValue(key.value, value)
def save_custom_model_id(self, model) -> None:
from buzz.model_loader import ModelType
match model.model_type:
case ModelType.FASTER_WHISPER:
self.set_value(
Settings.Key.CUSTOM_FASTER_WHISPER_ID,
model.hugging_face_model_id,
)
case ModelType.HUGGING_FACE:
self.set_value(
Settings.Key.HUGGINGFACE_MODEL_ID,
model.hugging_face_model_id,
)
def load_custom_model_id(self, model) -> str:
from buzz.model_loader import ModelType
match model.model_type:
case ModelType.FASTER_WHISPER:
return self.value(
Settings.Key.CUSTOM_FASTER_WHISPER_ID,
"",
)
case ModelType.HUGGING_FACE:
return self.value(
Settings.Key.HUGGINGFACE_MODEL_ID,
"",
)
return ""
def value(
self,
key: Key,
default_value: typing.Any,
value_type: typing.Optional[type] = None,
) -> typing.Any:
val = self.settings.value(
key.value,
default_value,
value_type if value_type is not None else type(default_value),
)
if (value_type is bool or isinstance(default_value, bool)):
if isinstance(val, bool):
return val
if isinstance(val, str):
return val.lower() in ("true", "1", "yes", "on")
if isinstance(val, int):
return val != 0
return bool(val)
return val
def clear(self):
self.settings.clear()
def begin_group(self, group: Key) -> None:
self.settings.beginGroup(group.value)
def end_group(self) -> None:
self.settings.endGroup()
def sync(self):
self.settings.sync()
def get_default_export_file_template(self) -> str:
return self.value(
Settings.Key.DEFAULT_EXPORT_FILE_NAME,
"{{ input_file_name }} ({{ task }}d on {{ date_time }})",
)

View file

@ -1,43 +0,0 @@
import enum
import typing
from buzz.locale import _
class Shortcut(str, enum.Enum):
sequence: str
description: str
def __new__(cls, sequence: str, description: str):
obj = str.__new__(cls, sequence)
obj._value_ = sequence
obj.sequence = sequence
obj.description = description
return obj
OPEN_RECORD_WINDOW = ("Ctrl+R", _("Open Record Window"))
OPEN_IMPORT_WINDOW = ("Ctrl+O", _("Import File"))
OPEN_IMPORT_URL_WINDOW = ("Ctrl+U", _("Import URL"))
OPEN_PREFERENCES_WINDOW = ("Ctrl+,", _("Open Preferences Window"))
VIEW_TRANSCRIPT_TEXT = ("Ctrl+E", _("View Transcript Text"))
VIEW_TRANSCRIPT_TRANSLATION = ("Ctrl+L", _("View Transcript Translation"))
VIEW_TRANSCRIPT_TIMESTAMPS = ("Ctrl+T", _("View Transcript Timestamps"))
SEARCH_TRANSCRIPT = ("Ctrl+F", _("Search Transcript"))
SEARCH_NEXT = ("Ctrl+Return", _("Go to Next Transcript Search Result"))
SEARCH_PREVIOUS = ("Shift+Return", _("Go to Previous Transcript Search Result"))
SCROLL_TO_CURRENT_TEXT = ("Ctrl+G", _("Scroll to Current Text"))
PLAY_PAUSE_AUDIO = ("Ctrl+P", _("Play/Pause Audio"))
REPLAY_CURRENT_SEGMENT = ("Ctrl+Shift+P", _("Replay Current Segment"))
TOGGLE_PLAYBACK_CONTROLS = ("Ctrl+Alt+P", _("Toggle Playback Controls"))
DECREASE_SEGMENT_START = ("Ctrl+Left", _("Decrease Segment Start Time"))
INCREASE_SEGMENT_START = ("Ctrl+Right", _("Increase Segment Start Time"))
DECREASE_SEGMENT_END = ("Ctrl+Shift+Left", _("Decrease Segment End Time"))
INCREASE_SEGMENT_END = ("Ctrl+Shift+Right", _("Increase Segment End Time"))
CLEAR_HISTORY = ("Ctrl+S", _("Clear History"))
STOP_TRANSCRIPTION = ("Ctrl+X", _("Cancel Transcription"))
@staticmethod
def get_default_shortcuts() -> typing.Dict[str, str]:
return {shortcut.name: shortcut.sequence for shortcut in Shortcut}

Some files were not shown because too many files have changed in this diff Show more