diff --git a/buzz/settings/settings.py b/buzz/settings/settings.py index 23f96b06..12599b95 100644 --- a/buzz/settings/settings.py +++ b/buzz/settings/settings.py @@ -27,6 +27,7 @@ class Settings: RECORDING_TRANSCRIBER_MODE = "recording-transcriber/mode" RECORDING_TRANSCRIBER_SILENCE_THRESHOLD = "recording-transcriber/silence-threshold" RECORDING_TRANSCRIBER_LINE_SEPARATOR = "recording-transcriber/line-separator" + RECORDING_TRANSCRIBER_TRANSCRIPTION_STEP = "recording-transcriber/transcription-step" RECORDING_TRANSCRIBER_EXPORT_FILE_TYPE = "recording-transcriber/export-file-type" RECORDING_TRANSCRIBER_EXPORT_MAX_ENTRIES = "recording-transcriber/export-max-entries" RECORDING_TRANSCRIBER_EXPORT_FILE_NAME = "recording-transcriber/export-file-name" diff --git a/buzz/transcriber/recording_transcriber.py b/buzz/transcriber/recording_transcriber.py index 23ecf54c..9a86d5ad 100644 --- a/buzz/transcriber/recording_transcriber.py +++ b/buzz/transcriber/recording_transcriber.py @@ -61,10 +61,10 @@ class RecordingTranscriber(QObject): self.input_device_index = input_device_index self.sample_rate = sample_rate if sample_rate is not None else whisper_audio.SAMPLE_RATE self.model_path = model_path - self.n_batch_samples = 5 * self.sample_rate # 5 seconds + self.n_batch_samples = int(5 * self.sample_rate) # 5 seconds self.keep_sample_seconds = 0.15 if self.transcriber_mode == RecordingTranscriberMode.APPEND_AND_CORRECT: - self.n_batch_samples = 3 * self.sample_rate # 3 seconds + self.n_batch_samples = int(transcription_options.transcription_step * self.sample_rate) self.keep_sample_seconds = 1.5 # pause queueing if more than 3 batches behind self.max_queue_size = 3 * self.n_batch_samples diff --git a/buzz/transcriber/transcriber.py b/buzz/transcriber/transcriber.py index 0a52b54f..7e803e80 100644 --- a/buzz/transcriber/transcriber.py +++ b/buzz/transcriber/transcriber.py @@ -155,6 +155,7 @@ class TranscriptionOptions: llm_model: str = "" silence_threshold: float = 0.0025 line_separator: str = "\n\n" + transcription_step: float = 3.5 def humanize_language(language: str) -> str: diff --git a/buzz/widgets/recording_transcriber_widget.py b/buzz/widgets/recording_transcriber_widget.py index 622bf90e..6c658a92 100644 --- a/buzz/widgets/recording_transcriber_widget.py +++ b/buzz/widgets/recording_transcriber_widget.py @@ -160,6 +160,10 @@ class RecordingTranscriberWidget(QWidget): key=Settings.Key.RECORDING_TRANSCRIBER_LINE_SEPARATOR, default_value="\n\n", ), + transcription_step=self.settings.value( + key=Settings.Key.RECORDING_TRANSCRIBER_TRANSCRIPTION_STEP, + default_value=3.5, + ), ) self.audio_devices_combo_box = AudioDevicesComboBox(self) @@ -1174,3 +1178,7 @@ class RecordingTranscriberWidget(QWidget): Settings.Key.RECORDING_TRANSCRIBER_LINE_SEPARATOR, self.transcription_options.line_separator, ) + self.settings.set_value( + Settings.Key.RECORDING_TRANSCRIBER_TRANSCRIPTION_STEP, + self.transcription_options.transcription_step, + ) diff --git a/buzz/widgets/transcriber/advanced_settings_dialog.py b/buzz/widgets/transcriber/advanced_settings_dialog.py index 2a10f76f..144b8d9c 100644 --- a/buzz/widgets/transcriber/advanced_settings_dialog.py +++ b/buzz/widgets/transcriber/advanced_settings_dialog.py @@ -117,12 +117,6 @@ class AdvancedSettingsDialog(QDialog): self.silence_threshold_spin_box.valueChanged.connect(self.on_silence_threshold_changed) layout.addRow(_("Silence threshold:"), self.silence_threshold_spin_box) - self.line_separator_line_edit = QLineEdit(self) - line_sep_display = repr(transcription_options.line_separator)[1:-1] or r"\n\n" - self.line_separator_line_edit.setText(line_sep_display) - self.line_separator_line_edit.textChanged.connect(self.on_line_separator_changed) - layout.addRow(_("Line separator:"), self.line_separator_line_edit) - # Live recording mode self.recording_mode_combo = QComboBox(self) for mode in RecordingTranscriberMode: @@ -133,6 +127,26 @@ class AdvancedSettingsDialog(QDialog): self.recording_mode_combo.currentIndexChanged.connect(self.on_recording_mode_changed) layout.addRow(_("Live recording mode:"), self.recording_mode_combo) + self.line_separator_line_edit = QLineEdit(self) + line_sep_display = repr(transcription_options.line_separator)[1:-1] or r"\n\n" + self.line_separator_line_edit.setText(line_sep_display) + self.line_separator_line_edit.textChanged.connect(self.on_line_separator_changed) + self.line_separator_label = QLabel(_("Line separator:")) + layout.addRow(self.line_separator_label, self.line_separator_line_edit) + + self.transcription_step_spin_box = QDoubleSpinBox(self) + self.transcription_step_spin_box.setRange(2.0, 5.0) + self.transcription_step_spin_box.setSingleStep(0.1) + self.transcription_step_spin_box.setDecimals(1) + self.transcription_step_spin_box.setValue(transcription_options.transcription_step) + self.transcription_step_spin_box.valueChanged.connect(self.on_transcription_step_changed) + self.transcription_step_label = QLabel(_("Transcription step:")) + layout.addRow(self.transcription_step_label, self.transcription_step_spin_box) + + self._update_recording_mode_visibility( + RecordingTranscriberMode(self.recording_mode_combo.currentText()) + ) + # Export enabled checkbox self._export_enabled = self.settings.value( Settings.Key.RECORDING_TRANSCRIBER_EXPORT_ENABLED, False @@ -245,6 +259,19 @@ class AdvancedSettingsDialog(QDialog): def on_recording_mode_changed(self, index: int): self.settings.set_value(Settings.Key.RECORDING_TRANSCRIBER_MODE, index) + mode = list(RecordingTranscriberMode)[index] + self._update_recording_mode_visibility(mode) + + def _update_recording_mode_visibility(self, mode: RecordingTranscriberMode): + is_append_and_correct = mode == RecordingTranscriberMode.APPEND_AND_CORRECT + self.line_separator_label.setVisible(not is_append_and_correct) + self.line_separator_line_edit.setVisible(not is_append_and_correct) + self.transcription_step_label.setVisible(is_append_and_correct) + self.transcription_step_spin_box.setVisible(is_append_and_correct) + + def on_transcription_step_changed(self, value: float): + self.transcription_options.transcription_step = round(value, 1) + self.transcription_options_changed.emit(self.transcription_options) def on_export_enabled_changed(self, state: int): self._export_enabled = state == 2