Add speed control (#1224)

Co-authored-by: Raivis Dejus <orvils@gmail.com>
2026-03-14 14:45:46 +01:00 · 2025-09-03 03:38:46 -05:00 · 2025-09-03 03:38:46 -05:00 · d285e6e43d
commit d285e6e43d
parent ad176beb47
11 changed files with 2394 additions and 48 deletions
--- a/buzz/settings/settings.py
+++ b/buzz/settings/settings.py
@ -56,6 +56,9 @@ class Settings:
        )

        MAIN_WINDOW = "main-window"
+        TRANSCRIPTION_VIEWER = "transcription-viewer"
+
+        AUDIO_PLAYBACK_RATE = "audio/playback-rate"

        FORCE_CPU = "force-cpu"

@ -100,16 +103,25 @@ class Settings:
        return ""

    def value(
-        self,
-        key: Key,
-        default_value: typing.Any,
-        value_type: typing.Optional[type] = None,
+            self,
+            key: Key,
+            default_value: typing.Any,
+            value_type: typing.Optional[type] = None,
    ) -> typing.Any:
-        return self.settings.value(
+        val = self.settings.value(
            key.value,
            default_value,
            value_type if value_type is not None else type(default_value),
        )
+        if (value_type is bool or isinstance(default_value, bool)):
+            if isinstance(val, bool):
+                return val
+            if isinstance(val, str):
+                return val.lower() in ("true", "1", "yes", "on")
+            if isinstance(val, int):
+                return val != 0
+            return bool(val)
+        return val

    def clear(self):
        self.settings.clear()
--- a/buzz/settings/shortcut.py
+++ b/buzz/settings/shortcut.py
@ -22,6 +22,9 @@ class Shortcut(str, enum.Enum):
    VIEW_TRANSCRIPT_TEXT = ("Ctrl+E", _("View Transcript Text"))
    VIEW_TRANSCRIPT_TRANSLATION = ("Ctrl+L", _("View Transcript Translation"))
    VIEW_TRANSCRIPT_TIMESTAMPS = ("Ctrl+T", _("View Transcript Timestamps"))
+    SEARCH_TRANSCRIPT = ("Ctrl+F", _("Search Transcript"))
+    SCROLL_TO_CURRENT_TEXT = ("Ctrl+G", _("Scroll to Current Text"))
+    TOGGLE_PLAYBACK_CONTROLS = ("Ctrl+P", _("Toggle Playback Controls"))

    CLEAR_HISTORY = ("Ctrl+S", _("Clear History"))
    STOP_TRANSCRIPTION = ("Ctrl+X", _("Cancel Transcription"))
--- a/buzz/widgets/audio_player.py
+++ b/buzz/widgets/audio_player.py
@ -1,3 +1,4 @@
+import logging
 from typing import Tuple, Optional

 from PyQt6 import QtGui
@ -6,6 +7,7 @@ from PyQt6.QtMultimedia import QAudioOutput, QMediaPlayer
 from PyQt6.QtWidgets import QWidget, QSlider, QPushButton, QLabel, QHBoxLayout

 from buzz.widgets.icon import PlayIcon, PauseIcon
+from buzz.settings.settings import Settings


 class AudioPlayer(QWidget):
@ -18,6 +20,10 @@ class AudioPlayer(QWidget):
        self.position_ms = 0
        self.duration_ms = 0
        self.invalid_media = None
+        self.is_looping = False  # Flag to prevent recursive position changes
+
+        # Initialize settings
+        self.settings = Settings()

        self.audio_output = QAudioOutput()
        self.audio_output.setVolume(100)
@ -26,6 +32,11 @@ class AudioPlayer(QWidget):
        self.media_player.setSource(QUrl.fromLocalFile(file_path))
        self.media_player.setAudioOutput(self.audio_output)

+        # Speed control moved to transcription viewer - just set default rate
+        saved_rate = self.settings.value(Settings.Key.AUDIO_PLAYBACK_RATE, 1.0, float)
+        saved_rate = max(0.1, min(5.0, saved_rate))  # Ensure valid range
+        self.media_player.setPlaybackRate(saved_rate)
+
        self.scrubber = QSlider(Qt.Orientation.Horizontal)
        self.scrubber.setRange(0, 0)
        self.scrubber.sliderMoved.connect(self.on_slider_moved)
@ -36,16 +47,19 @@ class AudioPlayer(QWidget):
        self.play_button = QPushButton("")
        self.play_button.setIcon(self.play_icon)
        self.play_button.clicked.connect(self.toggle_play)
+        self.play_button.setMaximumWidth(40)  # Match other button widths
+        self.play_button.setMinimumHeight(30)  # Match other button heights

        self.time_label = QLabel()
        self.time_label.setAlignment(Qt.AlignmentFlag.AlignRight)

-        layout = QHBoxLayout()
-        layout.addWidget(self.play_button, alignment=Qt.AlignmentFlag.AlignVCenter)
-        layout.addWidget(self.scrubber, alignment=Qt.AlignmentFlag.AlignVCenter)
-        layout.addWidget(self.time_label, alignment=Qt.AlignmentFlag.AlignVCenter)
+        # Create main layout - simplified without speed controls
+        main_layout = QHBoxLayout()
+        main_layout.addWidget(self.play_button, alignment=Qt.AlignmentFlag.AlignVCenter)
+        main_layout.addWidget(self.scrubber, alignment=Qt.AlignmentFlag.AlignVCenter)
+        main_layout.addWidget(self.time_label, alignment=Qt.AlignmentFlag.AlignVCenter)

-        self.setLayout(layout)
+        self.setLayout(main_layout)

        # Connect media player signals to the corresponding slots
        self.media_player.durationChanged.connect(self.on_duration_changed)
@ -68,10 +82,15 @@ class AudioPlayer(QWidget):

        # If a range has been selected as we've reached the end of the range,
        # loop back to the start of the range
-        if self.range_ms is not None:
+        if self.range_ms is not None and not self.is_looping:
            start_range_ms, end_range_ms = self.range_ms
-            if position_ms > end_range_ms:
+            # Check if we're at or past the end of the range (with small buffer for precision)
+            if position_ms >= (end_range_ms - 50):  # Within 50ms of end
+                logging.debug(f"🔄 LOOP: Reached end {end_range_ms}ms, jumping to start {start_range_ms}ms")
+                self.is_looping = True  # Set flag to prevent recursion
                self.set_position(start_range_ms)
+                # Reset flag immediately after setting position
+                self.is_looping = False

    def on_playback_state_changed(self, state: QMediaPlayer.PlaybackState):
        if state == QMediaPlayer.PlaybackState.PlayingState:
@ -93,6 +112,10 @@ class AudioPlayer(QWidget):
            self.scrubber.setRange(0, 1)
            self.scrubber.setDisabled(True)
            self.time_label.setDisabled(True)
+        else:
+            self.play_button.setEnabled(True)
+            self.scrubber.setEnabled(True)
+            self.time_label.setEnabled(True)

    def toggle_play(self):
        if self.media_player.playbackState() == QMediaPlayer.PlaybackState.PlayingState:
@ -101,13 +124,31 @@ class AudioPlayer(QWidget):
            self.media_player.play()

    def set_range(self, range_ms: Tuple[int, int]):
+        """Set a loop range. Only jump to start if current position is outside the range."""
        self.range_ms = range_ms
-        self.set_position(range_ms[0])
+        start_range_ms, end_range_ms = range_ms
+        
+        # Only jump to start if current position is outside the range
+        if self.position_ms < start_range_ms or self.position_ms > end_range_ms:
+            logging.debug(f"🔄 LOOP: Position {self.position_ms}ms outside range, jumping to {start_range_ms}ms")
+            self.set_position(start_range_ms)
+
+    def clear_range(self):
+        """Clear the current loop range"""
+        self.range_ms = None
+
+    def _reset_looping_flag(self):
+        """Reset the looping flag"""
+        self.is_looping = False

    def on_slider_moved(self, position_ms: int):
        self.set_position(position_ms)
-        # Reset range if slider is scrubbed manually
-        self.range_ms = None
+        # Only clear range if scrubbed significantly outside the current range
+        if self.range_ms is not None:
+            start_range_ms, end_range_ms = self.range_ms
+            # Clear range if scrubbed more than 2 seconds outside the range
+            if position_ms < (start_range_ms - 2000) or position_ms > (end_range_ms + 2000):
+                self.range_ms = None

    def set_position(self, position_ms: int):
        self.media_player.setPosition(position_ms)
--- a/buzz/widgets/icon.py
+++ b/buzz/widgets/icon.py
@ -89,6 +89,13 @@ class VisibilityIcon(Icon):
        )


+class ScrollToCurrentIcon(Icon):
+    def __init__(self, parent: QWidget):
+        super().__init__(
+            get_path("assets/visibility_FILL0_wght700_GRAD0_opsz48.svg"), parent
+        )
+
+
 BUZZ_ICON_PATH = get_path("assets/buzz.ico")
 BUZZ_LARGE_ICON_PATH = get_path("assets/buzz-icon-1024.png")

--- a/buzz/widgets/transcription_viewer/transcription_segments_editor_widget.py
+++ b/buzz/widgets/transcription_viewer/transcription_segments_editor_widget.py
@ -182,3 +182,12 @@ class TranscriptionSegmentsEditorWidget(QTableView):

    def segments(self) -> list[QSqlRecord]:
        return [self.model().record(i) for i in range(self.model().rowCount())]
+
+    def highlight_and_scroll_to_row(self, row_index: int):
+        """Highlight a specific row and scroll it into view"""
+        if 0 <= row_index < self.model().rowCount():
+            # Select the row
+            self.selectRow(row_index)
+            # Scroll to the row with better positioning
+            model_index = self.model().index(row_index, 0)
+            self.scrollTo(model_index, QAbstractItemView.ScrollHint.PositionAtCenter)
--- a/buzz/widgets/transcription_viewer/transcription_viewer_widget.py
+++ b/buzz/widgets/transcription_viewer/transcription_viewer_widget.py
--- a/docs/docs/index.md
+++ b/docs/docs/index.md
@ -21,9 +21,24 @@ OpenAI's [Whisper](https://github.com/openai/whisper).
  VTT ([Demo](https://www.loom.com/share/cf263b099ac3481082bb56d19b7c87fe))
 - Transcription and translation from your computer's microphones to text (Resource-intensive and may not be
  real-time, [Demo](https://www.loom.com/share/564b753eb4d44b55b985b8abd26b55f7))
+- **Advanced Transcription Viewer** with search, playback controls, and speed adjustment
+- **Smart Interface** with conditional visibility and state persistence
+- **Professional Controls** including loop segments, follow audio, and keyboard shortcuts
 - Supports [Whisper](https://github.com/openai/whisper#available-models-and-languages),
  [Whisper.cpp](https://github.com/ggerganov/whisper.cpp), [Faster Whisper](https://github.com/guillaumekln/faster-whisper),
  [Whisper-compatible Hugging Face models](https://huggingface.co/models?other=whisper), and
  the [OpenAI Whisper API](https://platform.openai.com/docs/api-reference/introduction)
 - [Command-Line Interface](#command-line-interface)
 - Available on Mac, Windows, and Linux
+
+## Transcription Viewer Interface
+
+Buzz features a powerful transcription viewer that makes it easy to work with your transcriptions:
+
+- **🔍 Smart Search**: Find text quickly with real-time search and navigation
+- **🎵 Playback Controls**: Loop segments, follow audio, and adjust playback speed
+- **⌨️ Keyboard Shortcuts**: Efficient navigation with Ctrl+F, Ctrl+L, and more
+- **🎨 Clean Interface**: Conditional visibility keeps the interface uncluttered
+- **💾 State Persistence**: Remembers your preferences between sessions
+
+[Learn more about the Transcription Viewer Interface →](usage/5_transcription_viewer)
--- a/docs/docs/usage/5_transcription_viewer.md
+++ b/docs/docs/usage/5_transcription_viewer.md
@ -0,0 +1,118 @@
+# Transcription Viewer Interface
+
+The Buzz transcription viewer provides a powerful interface for reviewing, editing, and navigating through your transcriptions. This guide covers all the features available in the transcription viewer.
+
+## Overview
+
+The transcription viewer is organized into several key sections:
+
+- **Top Toolbar**: Contains view mode, export, translate, resize, and search
+- **Search Bar**: Find and navigate through transcript text
+- **Transcription Segments**: Table view of all transcription segments with timestamps
+- **Playback Controls**: Audio playback settings and speed controls (since version 1.3.0)
+- **Audio Player**: Standard media player with progress bar
+- **Current Segment Display**: Shows the currently selected or playing segment
+
+## Top Toolbar
+
+### View Mode Button
+- **Function**: Switch between different viewing modes
+- **Options**:
+  - **Timestamps**: Shows segments in a table format with start/end times
+  - **Text**: Shows combined text without timestamps
+  - **Translation**: Shows translated text (if available)
+
+### Export Button
+- **Function**: Export transcription in various formats
+- **Formats**: SRT, VTT, TXT, JSON, and more
+- **Usage**: Click to open export menu and select desired format
+
+### Translate Button
+- **Function**: Translate transcription to different languages
+- **Usage**: Click to open translation settings and start translation
+
+### Resize Button
+- **Function**: Adjust transcription segment boundaries
+- **Usage**: Click to open resize dialog for fine-tuning timestamps
+- **More information**: See [Edit and Resize](https://chidiwilliams.github.io/buzz/docs/usage/edit_and_resize) section
+
+### Playback Controls Button
+(since version 1.3.0)
+- **Function**: Show/hide playback control panel
+- **Shortcut**: `Ctrl+P` (Windows/Linux) or `Cmd+P` (macOS)
+- **Behavior**: Toggle button that shows/hides the playback controls below
+
+### Find Button
+(since version 1.3.0)
+- **Function**: Show/hide search functionality
+- **Shortcut**: `Ctrl+F` (Windows/Linux) or `Cmd+F` (macOS)
+- **Behavior**: Toggle button that shows/hides the search bar
+
+### Scroll to Current Button
+(since version 1.3.0)
+- **Function**: Automatically scroll to the currently playing text
+- **Shortcut**: `Ctrl+G` (Windows/Linux) or `Cmd+G` (macOS)
+- **Usage**: Click to jump to the current audio position in the transcript
+
+## Search Functionality
+(since version 1.3.0)
+
+### Search Bar
+The search bar appears below the toolbar when activated and provides:
+
+- **Search Input**: Type text to find in the transcription (wider input field for better usability)
+- **Navigation**: Up/down arrows to move between matches
+- **Status**: Shows current match position and total matches (e.g., "3 of 15 matches")
+- **Clear**: Remove search text and results (larger button for better accessibility)
+- **Results**: Displays found text with context
+- **Consistent Button Sizing**: All navigation buttons have uniform height for better visual consistency
+
+### Search Shortcuts
+- **`Ctrl+F` / `Cmd+F`**: Toggle search bar on/off
+- **`Enter`**: Find next match
+- **`Shift+Enter`**: Find previous match
+- **`Escape`**: Close search bar
+
+### Search Features
+- **Real-time Search**: Results update as you type
+- **Case-insensitive**: Finds matches regardless of capitalization
+- **Word Boundaries**: Respects word boundaries for accurate matching
+- **Cross-view Search**: Works in all view modes (Timestamps, Text, Translation)
+
+## Playback Controls
+(since version 1.3.0)
+
+### Loop Segment
+- **Function**: Automatically loop playback of selected segments
+- **Usage**: Check the "Loop Segment" checkbox
+- **Behavior**: When enabled, clicking on a transcript segment will set a loop range
+- **Visual Feedback**: Loop range is highlighted in the audio player
+
+### Follow Audio
+- **Function**: Automatically scroll to current audio position
+- **Usage**: Check the "Follow Audio" checkbox
+- **Behavior**: Transcript automatically follows the audio playback
+- **Benefits**: Easy to follow along with long audio files
+
+### Speed Controls
+- **Function**: Adjust audio playback speed
+- **Range**: 0.5x to 2.0x speed
+- **Controls**:
+  - **Speed Dropdown**: Select from preset speeds or enter custom value
+  - **Decrease Button (-)**: Reduce speed by 0.05x increments
+  - **Increase Button (+)**: Increase speed by 0.05x increments
+- **Persistence**: Speed setting is saved between sessions
+- **Button Sizing**: Speed control buttons match the size of search navigation buttons for visual consistency
+
+## Keyboard Shortcuts
+
+### Navigation
+- **`Ctrl+F` / `Cmd+F`**: Toggle search bar
+- **`Ctrl+P` / `Cmd+P`**: Toggle playback controls
+- **`Ctrl+G` / `Cmd+G`**: Scroll to current position
+- **`Ctrl+O` / `Cmd+O`**: Open file import dialog
+
+### Search
+- **`Enter`**: Find next match
+- **`Shift+Enter`**: Find previous match
+- **`Escape`**: Close search bar
--- a/tests/widgets/audio_player_test.py
+++ b/tests/widgets/audio_player_test.py
@ -1,12 +1,19 @@
-
 import os
+import pytest

 from PyQt6.QtCore import QTime
 from PyQt6.QtMultimedia import QMediaPlayer
+from PyQt6.QtWidgets import QHBoxLayout
 from pytestqt.qtbot import QtBot

 from buzz.widgets.audio_player import AudioPlayer
 from tests.audio import test_audio_path
+from buzz.settings.settings import Settings
+
+
+def assert_approximately_equal(actual, expected, tolerance=0.001):
+    """Helper function to compare values with tolerance for floating-point precision"""
+    assert abs(actual - expected) < tolerance, f"Value {actual} is not approximately equal to {expected}"


 class TestAudioPlayer:
@ -42,3 +49,109 @@ class TestAudioPlayer:

        widget.on_playback_state_changed(QMediaPlayer.PlaybackState.StoppedState)
        assert widget.play_button.icon().themeName() == widget.play_icon.themeName()
+
+    def test_should_have_basic_audio_controls(self, qtbot: QtBot):
+        widget = AudioPlayer(test_audio_path)
+        qtbot.add_widget(widget)
+
+        # Speed controls were moved to transcription viewer - just verify basic audio player functionality
+        assert widget.play_button is not None
+        assert widget.scrubber is not None
+        assert widget.time_label is not None
+        
+        # Verify the widget loads audio correctly
+        assert widget.media_player is not None
+        assert os.path.normpath(widget.media_player.source().toLocalFile()) == os.path.normpath(test_audio_path)
+
+    def test_should_change_playback_rate_directly(self, qtbot: QtBot):
+        widget = AudioPlayer(test_audio_path)
+        qtbot.add_widget(widget)
+
+        # Speed controls moved to transcription viewer - test basic playback rate functionality
+        initial_rate = widget.media_player.playbackRate()
+        widget.media_player.setPlaybackRate(1.5)
+        assert_approximately_equal(widget.media_player.playbackRate(), 1.5)
+
+    def test_should_handle_custom_playback_rates(self, qtbot: QtBot):
+        widget = AudioPlayer(test_audio_path)
+        qtbot.add_widget(widget)
+
+        # Speed controls moved to transcription viewer - test basic playback rate functionality
+        widget.media_player.setPlaybackRate(1.7)
+        assert_approximately_equal(widget.media_player.playbackRate(), 1.7)
+
+    def test_should_handle_various_playback_rates(self, qtbot: QtBot):
+        widget = AudioPlayer(test_audio_path)
+        qtbot.add_widget(widget)
+
+        # Speed controls moved to transcription viewer - test basic playback rate functionality
+        # Test that the media player can handle various playback rates
+        widget.media_player.setPlaybackRate(0.5)
+        assert_approximately_equal(widget.media_player.playbackRate(), 0.5)
+        
+        widget.media_player.setPlaybackRate(2.0)
+        assert_approximately_equal(widget.media_player.playbackRate(), 2.0)
+
+    def test_should_use_single_row_layout(self, qtbot: QtBot):
+        widget = AudioPlayer(test_audio_path)
+        qtbot.add_widget(widget)
+
+        # Verify the layout structure
+        layout = widget.layout()
+        assert isinstance(layout, QHBoxLayout)
+        # Speed controls moved to transcription viewer - simplified layout
+        assert layout.count() == 3  # play_button, scrubber, time_label
+
+    def test_should_persist_playback_rate_setting(self, qtbot: QtBot):
+        widget = AudioPlayer(test_audio_path)
+        qtbot.add_widget(widget)
+
+        # Speed controls moved to transcription viewer - test that settings are loaded
+        # The widget should load the saved playback rate from settings
+        assert widget.settings is not None
+        saved_rate = widget.settings.value(Settings.Key.AUDIO_PLAYBACK_RATE, 1.0, float)
+        assert isinstance(saved_rate, float)
+        assert 0.1 <= saved_rate <= 5.0
+
+    def test_should_handle_range_looping(self, qtbot: QtBot):
+        widget = AudioPlayer(test_audio_path)
+        qtbot.add_widget(widget)
+
+        # Test range setting and looping functionality
+        widget.set_range((1000, 3000))  # 1-3 seconds
+        assert widget.range_ms == (1000, 3000)
+        
+        # Clear range
+        widget.clear_range()
+        assert widget.range_ms is None
+
+    def test_should_handle_invalid_media(self, qtbot: QtBot):
+        widget = AudioPlayer(test_audio_path)
+        qtbot.add_widget(widget)
+
+        widget.set_invalid_media(True)
+        
+        # Speed controls moved to transcription viewer - just verify invalid media handling
+        assert widget.invalid_media is True
+        assert widget.play_button.isEnabled() is False
+        assert widget.scrubber.isEnabled() is False
+        assert widget.time_label.isEnabled() is False
+
+    def test_should_stop_playback(self, qtbot: QtBot):
+        widget = AudioPlayer(test_audio_path)
+        qtbot.add_widget(widget)
+
+        # Test stop functionality
+        widget.stop()
+        assert widget.media_player.playbackState() == QMediaPlayer.PlaybackState.StoppedState
+
+    def test_should_handle_media_status_changes(self, qtbot: QtBot):
+        widget = AudioPlayer(test_audio_path)
+        qtbot.add_widget(widget)
+
+        # Test media status handling
+        widget.on_media_status_changed(QMediaPlayer.MediaStatus.LoadedMedia)
+        assert widget.invalid_media is False
+        
+        widget.on_media_status_changed(QMediaPlayer.MediaStatus.InvalidMedia)
+        assert widget.invalid_media is True
--- a/tests/widgets/shortcuts_editor_widget_test.py
+++ b/tests/widgets/shortcuts_editor_widget_test.py
@ -37,6 +37,9 @@ class TestShortcutsEditorWidget:
            (_("View Transcript Text"), "Ctrl+E"),
            (_("View Transcript Translation"), "Ctrl+L"),
            (_("View Transcript Timestamps"), "Ctrl+T"),
+            (_("Search Transcript"), "Ctrl+F"),
+            (_("Scroll to Current Text"), "Ctrl+G"),
+            (_("Toggle Playback Controls"), "Ctrl+P"),
            (_("Clear History"), "Ctrl+S"),
            (_("Cancel Transcription"), "Ctrl+X"),
        )
--- a/tests/widgets/transcription_viewer_test.py
+++ b/tests/widgets/transcription_viewer_test.py