diff --git a/buzz/locale/lv_LV/LC_MESSAGES/buzz.po b/buzz/locale/lv_LV/LC_MESSAGES/buzz.po index 9dc7fb8d..66860657 100644 --- a/buzz/locale/lv_LV/LC_MESSAGES/buzz.po +++ b/buzz/locale/lv_LV/LC_MESSAGES/buzz.po @@ -439,7 +439,7 @@ msgid "" "notes, just the translation." msgstr "" "Lūdzu, iztulko katru nosūtīto tekstu no angļu valodas spāņu valodā. " -"Tulkojums tiks izmantots automatizētā sistēmā, lūdzu, nepievienojiet nekādus " +"Tulkojums tiks izmantots automatizētā sistēmā, lūdzu, nepievieno nekādus " "komentārus vai piezīmes, tikai tulkojumu." #: buzz/widgets/transcriber/advanced_settings_dialog.py diff --git a/buzz/widgets/application.py b/buzz/widgets/application.py index 0448298c..8fad584b 100755 --- a/buzz/widgets/application.py +++ b/buzz/widgets/application.py @@ -34,6 +34,7 @@ class Application(QApplication): if darkdetect.isDark(): self.styleHints().setColorScheme(Qt.ColorScheme.Dark) + self.setStyleSheet("QCheckBox::indicator:unchecked { border: 1px solid white; }") if sys.platform.startswith("win"): self.setStyle(QStyleFactory.create("Fusion")) diff --git a/buzz/widgets/transcriber/advanced_settings_dialog.py b/buzz/widgets/transcriber/advanced_settings_dialog.py index 1959bbdd..a9e1a954 100644 --- a/buzz/widgets/transcriber/advanced_settings_dialog.py +++ b/buzz/widgets/transcriber/advanced_settings_dialog.py @@ -103,7 +103,7 @@ class AdvancedSettingsDialog(QDialog): self.silence_threshold_spin_box.setDecimals(4) self.silence_threshold_spin_box.setValue(transcription_options.silence_threshold) self.silence_threshold_spin_box.valueChanged.connect(self.on_silence_threshold_changed) - self.silence_threshold_spin_box.setFixedWidth(70) + self.silence_threshold_spin_box.setFixedWidth(90) layout.addRow(_("Silence threshold:"), self.silence_threshold_spin_box) # Live recording mode @@ -114,7 +114,7 @@ class AdvancedSettingsDialog(QDialog): self.settings.value(Settings.Key.RECORDING_TRANSCRIBER_MODE, 0) ) self.recording_mode_combo.currentIndexChanged.connect(self.on_recording_mode_changed) - self.recording_mode_combo.setFixedWidth(200) + self.recording_mode_combo.setFixedWidth(250) layout.addRow(_("Live recording mode") + ":", self.recording_mode_combo) self.line_separator_line_edit = QLineEdit(self) @@ -210,7 +210,7 @@ class AdvancedSettingsDialog(QDialog): self.export_max_entries_spin.setValue(max_entries) self.export_max_entries_spin.setEnabled(self._export_enabled) self.export_max_entries_spin.valueChanged.connect(self.on_export_max_entries_changed) - self.export_max_entries_spin.setFixedWidth(70) + self.export_max_entries_spin.setFixedWidth(90) self.export_max_entries_label = QLabel(_("Limit export entries\n(0 = export all):")) self.export_max_entries_label.setEnabled(self._export_enabled) layout.addRow(self.export_max_entries_label, self.export_max_entries_spin) diff --git a/docs/docs/usage/1_file_import.md b/docs/docs/usage/1_file_import.md index a811fddc..8b1086a8 100644 --- a/docs/docs/usage/1_file_import.md +++ b/docs/docs/usage/1_file_import.md @@ -16,11 +16,11 @@ title: File Import To reduce misspellings you can pass some commonly misspelled words in an `Initial prompt` that is available under `Advanced...` button. See this [guide on prompting](https://cookbook.openai.com/examples/whisper_prompting_guide#pass-names-in-the-prompt-to-prevent-misspellings). -| Field | Options | Default | Description | -| ------------------ | ------------------- | ------- |---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Export As | "TXT", "SRT", "VTT" | "TXT" | Export file format | -| Word-Level Timings | Off / On | Off | If checked, the transcription will generate a separate subtitle line for each word in the audio. Combine words into subtitles afterwards with the [resize option](https://chidiwilliams.github.io/buzz/docs/usage/edit_and_resize). | -| Extract speech | Off / On | Off | If checked, speech will be extracted to a separate audio tack to improve accuracy. Available since 1.3.0. | +| Field | Options | Default | Description | +| ------------------ | ------------------- | ------- |--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Export As | "TXT", "SRT", "VTT" | "TXT" | Export file format | +| Word-Level Timings | Off / On | Off | If checked, the transcription will generate a separate subtitle line for each word in the audio. Combine words into subtitles afterwards with the [resize option](https://chidiwilliams.github.io/buzz/docs/usage/edit_and_resize). | +| Extract speech | Off / On | Off | If checked, speech will be extracted to a separate audio tack to improve accuracy. | (See the [Live Recording section](https://chidiwilliams.github.io/buzz/docs/usage/live_recording) for more information about the task, language, and quality settings.) diff --git a/docs/docs/usage/2_live_recording.md b/docs/docs/usage/2_live_recording.md index b2dd1733..750c5874 100644 --- a/docs/docs/usage/2_live_recording.md +++ b/docs/docs/usage/2_live_recording.md @@ -20,12 +20,16 @@ To start a live recording: #### Advanced preferences **Silence threshold** Set threshold to for transcriptions to be processed. If average volume level is under this setting the sentence will not be transcribed. Available since 1.4.4. + **Line separator** Marking to add to the transcription and translation lines. Default value is two new lines (`\n\n`) that result in an empty space between translation or transcription lines. To have no empty line use `\n`. Available since 1.4.4. +**Transcription step** If live recording mode is set to `Append and correct`, you can also set a transcription step. Shorter steps will reduce latency but cause larger load on the system. Monitor the `Queue` while transcribing in this mode, if it grows too much, increase the transcription step, to reduce load. Available since 1.4.4. + +**Hide unconfirmed** If live recording mode is set to `Append and correct`, you can also hide the unconfirmed part of the last transcript. This part may be incorrect as the Buzz has seen it only in one overlapping transcription segment. Hiding it will increase latency, but result will show only the correct transcripts. Available since 1.4.4. #### Presentation Window -Since 1.4.2 Buzz has an easy to use presentation window you can use to show live transcriptions during events and presentations. To open it start the recording and new options for the `Presentation window` will appear. +Buzz has an easy to use presentation window you can use to show live transcriptions during events and presentations. To open it start the recording and new options for the `Presentation window` will appear. ### Record audio playing from computer (macOS) diff --git a/docs/docs/usage/3_translations.md b/docs/docs/usage/3_translations.md index 84ba4a54..44ed71f5 100644 --- a/docs/docs/usage/3_translations.md +++ b/docs/docs/usage/3_translations.md @@ -2,7 +2,7 @@ title: Translations --- -Default `Translation` task uses Whisper model ability to translate to English, however `Large-V3-Turbo` is not compatible with this standard. Since version `1.0.0` Buzz supports additional AI translations to any other language. +Default `Translation` task uses Whisper model ability to translate to English, however `Large-V3-Turbo` is not compatible with this standard. Buzz supports additional AI translations to any other language. To use translation feature you will need to configure OpenAI API key and translation settings. Set OpenAI API ket in Preferences. Buzz also supports custom locally running translation AIs that support OpenAI API. For more information on locally running AIs see [ollama](https://ollama.com/blog/openai-compatibility) or [LM Studio](https://lmstudio.ai/). For information on available custom APIs see this [discussion thread](https://github.com/chidiwilliams/buzz/discussions/827). diff --git a/docs/docs/usage/4_edit_and_resize.md b/docs/docs/usage/4_edit_and_resize.md index 7bdf92bb..4231d8db 100644 --- a/docs/docs/usage/4_edit_and_resize.md +++ b/docs/docs/usage/4_edit_and_resize.md @@ -8,6 +8,6 @@ When transcript of some audio or video file is generated you can edit it and exp Transcription view screen has option to resize the transcripts. Click on the "Resize" button so see available options. Transcripts that have been generated **with word-level timings** setting enabled can be combined into subtitles specifying different options, like maximum length of a subtitle and if subtitles should be split on punctuation. For transcripts that have been generated **without word-level timings** setting enabled can only be recombined specifying desired max length of a subtitle. -If audio file is still present on the system word-level timing merge will also analyze the audio for silences to improve subtitle accuracy. Subtitle generation from transcripts with word-level timings is available since version 1.3.0. +If audio file is still present on the system word-level timing merge will also analyze the audio for silences to improve subtitle accuracy. The resize tool also has an option to extend end time of segments if you want the subtitles to be on the screen for longer. You can specify the amount of time in seconds to extend each subtitle segment. Buzz will add this amount of time to the end of each subtitle segment making sure that the end of a segment does not go over start of the next segment. This feature is available since 1.4.3. \ No newline at end of file diff --git a/docs/docs/usage/5_speaker_identification.md b/docs/docs/usage/5_speaker_identification.md index 72dc7ee6..a8c8c097 100644 --- a/docs/docs/usage/5_speaker_identification.md +++ b/docs/docs/usage/5_speaker_identification.md @@ -6,4 +6,4 @@ When transcript of some audio or video file is generated you can identify speake Transcription view screen has option to identify speakers. Click on the "Identify speakers" button so see available options. -If audio file is still present on the system speaker identification will mark each speakers sentences with appropriate label. You can preview 10 seconds of some random sentence of the identified speaker and rename the automatically identified label to speakers real name. If "Merge speaker sentences" checkbox is selected when you save the speaker labels, all consecutive sentences of the same speaker will be merged into one segment. Speaker identification is available since version 1.4.0 on all platforms except Intel macOS. \ No newline at end of file +If audio file is still present on the system speaker identification will mark each speakers sentences with appropriate label. You can preview 10 seconds of some random sentence of the identified speaker and rename the automatically identified label to speakers real name. If "Merge speaker sentences" checkbox is selected when you save the speaker labels, all consecutive sentences of the same speaker will be merged into one segment. Speaker identification is not available on Intel macOS. \ No newline at end of file diff --git a/docs/docs/usage/5_transcription_viewer.md b/docs/docs/usage/5_transcription_viewer.md index a4c9c9e2..c075b9b3 100644 --- a/docs/docs/usage/5_transcription_viewer.md +++ b/docs/docs/usage/5_transcription_viewer.md @@ -9,7 +9,7 @@ The transcription viewer is organized into several key sections: - **Top Toolbar**: Contains view mode, export, translate, resize, and search - **Search Bar**: Find and navigate through transcript text - **Transcription Segments**: Table view of all transcription segments with timestamps -- **Playback Controls**: Audio playback settings and speed controls (since version 1.3.0) +- **Playback Controls**: Audio playback settings and speed controls - **Audio Player**: Standard media player with progress bar - **Current Segment Display**: Shows the currently selected or playing segment @@ -37,25 +37,21 @@ The transcription viewer is organized into several key sections: - **More information**: See [Edit and Resize](https://chidiwilliams.github.io/buzz/docs/usage/edit_and_resize) section ### Playback Controls Button -(since version 1.3.0) - **Function**: Show/hide playback control panel - **Shortcut**: `Ctrl+Alt+P` (Windows/Linux) or `Cmd+Alt+P` (macOS) - **Behavior**: Toggle button that shows/hides the playback controls below ### Find Button -(since version 1.3.0) - **Function**: Show/hide search functionality - **Shortcut**: `Ctrl+F` (Windows/Linux) or `Cmd+F` (macOS) - **Behavior**: Toggle button that shows/hides the search bar ### Scroll to Current Button -(since version 1.3.0) - **Function**: Automatically scroll to the currently playing text - **Shortcut**: `Ctrl+G` (Windows/Linux) or `Cmd+G` (macOS) - **Usage**: Click to jump to the current audio position in the transcript ## Search Functionality -(since version 1.3.0) ### Search Bar The search bar appears below the toolbar when activated and provides: @@ -80,7 +76,6 @@ The search bar appears below the toolbar when activated and provides: - **Cross-view Search**: Works in all view modes (Timestamps, Text, Translation) ## Playback Controls -(since version 1.3.0) ### Loop Segment - **Function**: Automatically loop playback of selected segments @@ -105,7 +100,6 @@ The search bar appears below the toolbar when activated and provides: - **Button Sizing**: Speed control buttons match the size of search navigation buttons for visual consistency ## Keyboard Shortcuts -(since version 1.3.0) ### Audio Playback - **`Ctrl+P` / `Cmd+P`**: Play/Pause audio