Add benchmarks (#417)

2024-06-28 04:30:06 +02:00 · 2023-04-25 18:27:40 +00:00 · 2023-04-25 18:27:40 +00:00 · 84ab53d5bd
parent 158aa0b8ff
commit 84ab53d5bd
8 changed files with 169 additions and 3 deletions
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@ -178,6 +178,79 @@ jobs:
            dist/Buzz*-mac.dmg
            dist/Buzz*-unix.tar.gz

+  benchmark:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: macos-latest
+          - os: windows-latest
+          - os: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          submodules: recursive
+
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10.7'
+
+      - name: Install Poetry Action
+        uses: snok/install-poetry@v1.3.1
+        with:
+          virtualenvs-create: true
+          virtualenvs-in-project: true
+
+      - name: Load cached venv
+        id: cached-poetry-dependencies
+        uses: actions/cache@v3
+        with:
+          path: .venv
+          key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }}-2
+
+      - name: Load cached Whisper models
+        id: cached-whisper-models
+        uses: actions/cache@v3
+        with:
+          path: |
+            ~/Library/Caches/Buzz
+            ~/.cache/whisper
+            ~/.cache/huggingface
+            ~/AppData/Local/Buzz/Buzz/Cache
+          key: whisper-models-${{ runner.os }}
+
+      - uses: FedericoCarboni/setup-ffmpeg@v1
+        id: setup-ffmpeg
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Install dependencies
+        run: poetry config experimental.new-installer false && poetry install
+
+      - name: Test
+        run: |
+          if [ "$RUNNER_OS" == "Linux" ]; then
+            sudo apt install libxkbcommon-x11-0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0 libxcb-render-util0 libxcb-xinerama0 libxcb-xfixes0 x11-utils
+            /sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -screen 0 1920x1200x24 -ac +extension GLX
+            sudo apt update
+            sudo apt install -y libpulse-mainloop-glib0 libegl1-mesa-dev libgstreamer-plugins-base1.0-dev libgstreamer1.0-dev libportaudio2
+          fi
+          
+          poetry run make benchmarks
+        shell: bash
+
+      - name: Store benchmark results
+        uses: benchmark-action/github-action-benchmark@v1
+        with:
+          name: ${{ runner.os }}
+          tool: 'pytest'
+          output-file-path: benchmarks.json
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          comment-on-alert: true
+          summary-always: true
+          auto-push: true
+
  release:
    runs-on: ${{ matrix.os }}
    strategy:
--- a/.gitignore
+++ b/.gitignore
@ -18,3 +18,5 @@ whisper_cpp.py

 # Internationalization - compiled binaries
 locale/**/*.mo
+
+benchmarks.json
--- a/5
+++ b/5
@ -38,7 +38,10 @@ clean:
 	rm -rf dist/* || true

 test: buzz/whisper_cpp.py translation_mo
-	pytest -vv --cov=buzz --cov-report=xml --cov-report=html
+	pytest -vv --cov=buzz --cov-report=xml --cov-report=html --benchmark-skip
+
+benchmarks: buzz/whisper_cpp.py translation_mo
+	pytest -vv --benchmark-only --benchmark-json benchmarks.json

 dist/Buzz dist/Buzz.app: buzz/whisper_cpp.py translation_mo
 	pyinstaller --noconfirm Buzz.spec
--- a/buzz/gui.py
+++ b/buzz/gui.py
@ -1371,7 +1371,7 @@ class MenuBar(QMenuBar):
        self.import_action.triggered.connect(
            self.on_import_action_triggered)

-        about_action = QAction(f'{_("About...")} {APP_NAME}', self)
+        about_action = QAction(f'{_("About")} {APP_NAME}', self)
        about_action.triggered.connect(self.on_about_action_triggered)

        self.preferences_action = QAction(_("Preferences..."), self)
--- a/poetry.lock
+++ b/poetry.lock
@ -1367,6 +1367,18 @@ files = [
    {file = "protobuf-4.22.1.tar.gz", hash = "sha256:dce7a55d501c31ecf688adb2f6c3f763cf11bc0be815d1946a84d74772ab07a7"},
 ]

+[[package]]
+name = "py-cpuinfo"
+version = "9.0.0"
+description = "Get CPU info with pure Python"
+category = "dev"
+optional = false
+python-versions = "*"
+files = [
+    {file = "py-cpuinfo-9.0.0.tar.gz", hash = "sha256:3cdbbf3fac90dc6f118bfd64384f309edeadd902d7c8fb17f02ffa1fc3f49690"},
+    {file = "py_cpuinfo-9.0.0-py3-none-any.whl", hash = "sha256:859625bc251f64e21f077d099d4162689c762b5d6a4c3c97553d56241c9674d5"},
+]
+
 [[package]]
 name = "pycodestyle"
 version = "2.10.0"
@ -1558,6 +1570,27 @@ tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
 [package.extras]
 testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]

+[[package]]
+name = "pytest-benchmark"
+version = "4.0.0"
+description = "A ``pytest`` fixture for benchmarking code. It will group the tests into rounds that are calibrated to the chosen timer."
+category = "dev"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "pytest-benchmark-4.0.0.tar.gz", hash = "sha256:fb0785b83efe599a6a956361c0691ae1dbb5318018561af10f3e915caa0048d1"},
+    {file = "pytest_benchmark-4.0.0-py3-none-any.whl", hash = "sha256:fdb7db64e31c8b277dff9850d2a2556d8b60bcb0ea6524e36e28ffd7c87f71d6"},
+]
+
+[package.dependencies]
+py-cpuinfo = "*"
+pytest = ">=3.8"
+
+[package.extras]
+aspect = ["aspectlib"]
+elasticsearch = ["elasticsearch"]
+histogram = ["pygal", "pygaljs"]
+
 [[package]]
 name = "pytest-cov"
 version = "4.0.0"
@ -2369,4 +2402,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9.13,<3.11"
-content-hash = "10dd56bee87a09ccb6c7abf32fcda50d7379a6c5d5e87c59ca4aed48cd648d86"
+content-hash = "5c620ecf99cf19a1780a50728c1f5c93a066104bf923d7f98a7a54d568e44a3f"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -32,6 +32,7 @@ pytest-xvfb = "^2.0.0"
 pylint = "^2.15.5"
 pre-commit = "^2.20.0"
 ctypesgen = "^1.1.1"
+pytest-benchmark = "^4.0.0"

 [build-system]
 requires = ["poetry-core"]
--- a/testdata/audio-long.mp3
+++ b/testdata/audio-long.mp3
--- a/tests/transcriber_benchmarks_test.py
+++ b/tests/transcriber_benchmarks_test.py
@ -0,0 +1,54 @@
+import platform
+from unittest.mock import Mock
+
+import pytest
+
+from buzz.model_loader import WhisperModelSize, ModelType, TranscriptionModel
+from buzz.transcriber import (FileTranscriptionOptions, FileTranscriptionTask, Task, WhisperCppFileTranscriber,
+                              TranscriptionOptions, WhisperFileTranscriber, FileTranscriber)
+from tests.model_loader import get_model_path
+
+
+def get_task(model: TranscriptionModel):
+    file_transcription_options = FileTranscriptionOptions(
+        file_paths=['testdata/whisper-french.mp3'])
+    transcription_options = TranscriptionOptions(language='fr', task=Task.TRANSCRIBE,
+                                                 word_level_timings=False,
+                                                 model=model)
+    model_path = get_model_path(transcription_options.model)
+    return FileTranscriptionTask(file_path='testdata/audio-long.mp3', transcription_options=transcription_options,
+                                 file_transcription_options=file_transcription_options, model_path=model_path)
+
+
+def transcribe(qtbot, transcriber: FileTranscriber):
+    mock_completed = Mock()
+    transcriber.completed.connect(mock_completed)
+    with qtbot.waitSignal(transcriber.completed, timeout=10 * 60 * 1000):
+        transcriber.run()
+
+    segments = mock_completed.call_args[0][0]
+    return segments
+
+
+@pytest.mark.parametrize(
+    'transcriber',
+    [
+        pytest.param(
+            WhisperCppFileTranscriber(task=(get_task(
+                TranscriptionModel(model_type=ModelType.WHISPER_CPP, whisper_model_size=WhisperModelSize.TINY)))),
+            id="Whisper.cpp - Tiny"),
+        pytest.param(
+            WhisperFileTranscriber(task=(get_task(
+                TranscriptionModel(model_type=ModelType.WHISPER, whisper_model_size=WhisperModelSize.TINY)))),
+            id="Whisper - Tiny"),
+        pytest.param(
+            WhisperFileTranscriber(task=(get_task(
+                TranscriptionModel(model_type=ModelType.FASTER_WHISPER, whisper_model_size=WhisperModelSize.TINY)))),
+            id="Faster Whisper - Tiny",
+            marks=pytest.mark.skipif(platform.system() == 'Darwin',
+                                     reason='Error with libiomp5 already initialized on GH action runner: https://github.com/chidiwilliams/buzz/actions/runs/4657331262/jobs/8241832087')
+        ),
+    ])
+def test_should_transcribe_and_benchmark(qtbot, benchmark, transcriber):
+    segments = benchmark(transcribe, qtbot, transcriber)
+    assert len(segments) > 0