Add benchmarks (#417)

This commit is contained in:
Chidi Williams 2023-04-25 18:27:40 +00:00 committed by GitHub
parent 158aa0b8ff
commit 84ab53d5bd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 169 additions and 3 deletions

View file

@ -178,6 +178,79 @@ jobs:
dist/Buzz*-mac.dmg
dist/Buzz*-unix.tar.gz
benchmark:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
include:
- os: macos-latest
- os: windows-latest
- os: ubuntu-20.04
steps:
- uses: actions/checkout@v3
with:
submodules: recursive
- uses: actions/setup-python@v4
with:
python-version: '3.10.7'
- name: Install Poetry Action
uses: snok/install-poetry@v1.3.1
with:
virtualenvs-create: true
virtualenvs-in-project: true
- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v3
with:
path: .venv
key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}-2
- name: Load cached Whisper models
id: cached-whisper-models
uses: actions/cache@v3
with:
path: |
~/Library/Caches/Buzz
~/.cache/whisper
~/.cache/huggingface
~/AppData/Local/Buzz/Buzz/Cache
key: whisper-models-${{ runner.os }}
- uses: FedericoCarboni/setup-ffmpeg@v1
id: setup-ffmpeg
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Install dependencies
run: poetry config experimental.new-installer false && poetry install
- name: Test
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt install libxkbcommon-x11-0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0 libxcb-render-util0 libxcb-xinerama0 libxcb-xfixes0 x11-utils
/sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -screen 0 1920x1200x24 -ac +extension GLX
sudo apt update
sudo apt install -y libpulse-mainloop-glib0 libegl1-mesa-dev libgstreamer-plugins-base1.0-dev libgstreamer1.0-dev libportaudio2
fi
poetry run make benchmarks
shell: bash
- name: Store benchmark results
uses: benchmark-action/github-action-benchmark@v1
with:
name: ${{ runner.os }}
tool: 'pytest'
output-file-path: benchmarks.json
github-token: ${{ secrets.GITHUB_TOKEN }}
comment-on-alert: true
summary-always: true
auto-push: true
release:
runs-on: ${{ matrix.os }}
strategy:

2
.gitignore vendored
View file

@ -18,3 +18,5 @@ whisper_cpp.py
# Internationalization - compiled binaries
locale/**/*.mo
benchmarks.json

View file

@ -38,7 +38,10 @@ clean:
rm -rf dist/* || true
test: buzz/whisper_cpp.py translation_mo
pytest -vv --cov=buzz --cov-report=xml --cov-report=html
pytest -vv --cov=buzz --cov-report=xml --cov-report=html --benchmark-skip
benchmarks: buzz/whisper_cpp.py translation_mo
pytest -vv --benchmark-only --benchmark-json benchmarks.json
dist/Buzz dist/Buzz.app: buzz/whisper_cpp.py translation_mo
pyinstaller --noconfirm Buzz.spec

View file

@ -1371,7 +1371,7 @@ class MenuBar(QMenuBar):
self.import_action.triggered.connect(
self.on_import_action_triggered)
about_action = QAction(f'{_("About...")} {APP_NAME}', self)
about_action = QAction(f'{_("About")} {APP_NAME}', self)
about_action.triggered.connect(self.on_about_action_triggered)
self.preferences_action = QAction(_("Preferences..."), self)

35
poetry.lock generated
View file

@ -1367,6 +1367,18 @@ files = [
{file = "protobuf-4.22.1.tar.gz", hash = "sha256:dce7a55d501c31ecf688adb2f6c3f763cf11bc0be815d1946a84d74772ab07a7"},
]
[[package]]
name = "py-cpuinfo"
version = "9.0.0"
description = "Get CPU info with pure Python"
category = "dev"
optional = false
python-versions = "*"
files = [
{file = "py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690"},
{file = "py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5"},
]
[[package]]
name = "pycodestyle"
version = "2.10.0"
@ -1558,6 +1570,27 @@ tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
[package.extras]
testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
[[package]]
name = "pytest-benchmark"
version = "4.0.0"
description = "A ``pytest`` fixture for benchmarking code. It will group the tests into rounds that are calibrated to the chosen timer."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
{file = "pytest-benchmark-4.0.0.tar.gz", hash = "sha256:fb0785b83efe599a6a956361c0691ae1dbb5318018561af10f3e915caa0048d1"},
{file = "pytest_benchmark-4.0.0-py3-none-any.whl", hash = "sha256:fdb7db64e31c8b277dff9850d2a2556d8b60bcb0ea6524e36e28ffd7c87f71d6"},
]
[package.dependencies]
py-cpuinfo = "*"
pytest = ">=3.8"
[package.extras]
aspect = ["aspectlib"]
elasticsearch = ["elasticsearch"]
histogram = ["pygal", "pygaljs"]
[[package]]
name = "pytest-cov"
version = "4.0.0"
@ -2369,4 +2402,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more
[metadata]
lock-version = "2.0"
python-versions = ">=3.9.13,<3.11"
content-hash = "10dd56bee87a09ccb6c7abf32fcda50d7379a6c5d5e87c59ca4aed48cd648d86"
content-hash = "5c620ecf99cf19a1780a50728c1f5c93a066104bf923d7f98a7a54d568e44a3f"

View file

@ -32,6 +32,7 @@ pytest-xvfb = "^2.0.0"
pylint = "^2.15.5"
pre-commit = "^2.20.0"
ctypesgen = "^1.1.1"
pytest-benchmark = "^4.0.0"
[build-system]
requires = ["poetry-core"]

BIN
testdata/audio-long.mp3 vendored Normal file

Binary file not shown.

View file

@ -0,0 +1,54 @@
import platform
from unittest.mock import Mock
import pytest
from buzz.model_loader import WhisperModelSize, ModelType, TranscriptionModel
from buzz.transcriber import (FileTranscriptionOptions, FileTranscriptionTask, Task, WhisperCppFileTranscriber,
TranscriptionOptions, WhisperFileTranscriber, FileTranscriber)
from tests.model_loader import get_model_path
def get_task(model: TranscriptionModel):
file_transcription_options = FileTranscriptionOptions(
file_paths=['testdata/whisper-french.mp3'])
transcription_options = TranscriptionOptions(language='fr', task=Task.TRANSCRIBE,
word_level_timings=False,
model=model)
model_path = get_model_path(transcription_options.model)
return FileTranscriptionTask(file_path='testdata/audio-long.mp3', transcription_options=transcription_options,
file_transcription_options=file_transcription_options, model_path=model_path)
def transcribe(qtbot, transcriber: FileTranscriber):
mock_completed = Mock()
transcriber.completed.connect(mock_completed)
with qtbot.waitSignal(transcriber.completed, timeout=10 * 60 * 1000):
transcriber.run()
segments = mock_completed.call_args[0][0]
return segments
@pytest.mark.parametrize(
'transcriber',
[
pytest.param(
WhisperCppFileTranscriber(task=(get_task(
TranscriptionModel(model_type=ModelType.WHISPER_CPP, whisper_model_size=WhisperModelSize.TINY)))),
id="Whisper.cpp - Tiny"),
pytest.param(
WhisperFileTranscriber(task=(get_task(
TranscriptionModel(model_type=ModelType.WHISPER, whisper_model_size=WhisperModelSize.TINY)))),
id="Whisper - Tiny"),
pytest.param(
WhisperFileTranscriber(task=(get_task(
TranscriptionModel(model_type=ModelType.FASTER_WHISPER, whisper_model_size=WhisperModelSize.TINY)))),
id="Faster Whisper - Tiny",
marks=pytest.mark.skipif(platform.system() == 'Darwin',
reason='Error with libiomp5 already initialized on GH action runner: https://github.com/chidiwilliams/buzz/actions/runs/4657331262/jobs/8241832087')
),
])
def test_should_transcribe_and_benchmark(qtbot, benchmark, transcriber):
segments = benchmark(transcribe, qtbot, transcriber)
assert len(segments) > 0