Merge branch 'main' of https://github.com/chidiwilliams/buzz into 323-whispercpp-transcription-crashing-on-mac-m1

This commit is contained in:
Chidi Williams 2023-01-24 19:31:50 +00:00
commit 21e1c34638
6 changed files with 157 additions and 62 deletions

View file

@ -21,6 +21,7 @@ jobs:
include:
- os: macos-latest
- os: windows-latest
- os: ubuntu-20.04
steps:
- uses: actions/checkout@v3
with:
@ -63,7 +64,16 @@ jobs:
run: poetry config experimental.new-installer false && poetry install
- name: Test
run: poetry run make test
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt install libxkbcommon-x11-0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0 libxcb-render-util0 libxcb-xinerama0 libxcb-xfixes0 x11-utils
/sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -screen 0 1920x1200x24 -ac +extension GLX
sudo apt update
sudo apt install -y libpulse-mainloop-glib0 libegl1-mesa-dev libgstreamer-plugins-base1.0-dev libgstreamer1.0-dev libportaudio2
fi
poetry run make test
shell: bash
- name: Upload coverage reports to Codecov with GitHub Action
uses: codecov/codecov-action@v3
@ -78,6 +88,7 @@ jobs:
include:
- os: macos-latest
- os: windows-latest
- os: ubuntu-20.04
steps:
- uses: actions/checkout@v3
with:
@ -140,6 +151,10 @@ jobs:
elif [ "$RUNNER_OS" == "Linux" ]; then
sudo apt install libxkbcommon-x11-0 libxcb-icccm4 libxcb-image0 libxcb-keysyms1 libxcb-randr0 libxcb-render-util0 libxcb-xinerama0 libxcb-xfixes0 x11-utils
/sbin/start-stop-daemon --start --quiet --pidfile /tmp/custom_xvfb_99.pid --make-pidfile --background --exec /usr/bin/Xvfb -- :99 -screen 0 1920x1200x24 -ac +extension GLX
sudo apt update
sudo apt install -y libpulse-mainloop-glib0 libegl1-mesa-dev libgstreamer-plugins-base1.0-dev libgstreamer1.0-dev libportaudio2 gettext
poetry run make bundle_linux
fi
@ -160,6 +175,7 @@ jobs:
path: |
dist/Buzz*-windows.tar.gz
dist/Buzz*-windows.exe
dist/Buzz*-unix.tar.gz
dist/Buzz-*.dmg
bundle_mac_arm:
@ -295,6 +311,7 @@ jobs:
with:
files: |
Buzz*-windows.tar.gz
Buzz*-unix.tar.gz
Buzz*-windows.exe
Buzz-*.dmg

View file

@ -24,21 +24,31 @@ OpenAI's [Whisper](https://github.com/openai/whisper).
## Installation
To install Buzz, download the [latest version](https://github.com/chidiwilliams/buzz/releases/latest) for your operating
system. Buzz is available on **Mac** and **Windows**.
system. Buzz is available on **Mac**, **Windows**, and **Linux**.
### Mac (macOS 11.7 and above)
### Mac (macOS 11.7 and later)
Install via [brew](https://brew.sh/):
- Install via [brew](https://brew.sh/):
```shell
brew install --cask buzz
```
```shell
brew install --cask buzz
```
Or, download and run the `Buzz-x.y.z.dmg` file.
Alternatively, download and run the `Buzz-x.y.z.dmg` file.
### Windows
### Windows (Windows 10 and later)
Download and run the `Buzz-x.y.z.exe` file.
- Download and run the `Buzz-x.y.z.exe` file.
### Linux (Ubuntu 20.04 and later)
- Install dependencies:
```shell
sudo apt-get install libportaudio2
```
- Download and extract the `Buzz-x.y.z-unix.tar.gz` file
## How to use
@ -97,10 +107,12 @@ and [Virtual Audio Cable](https://vac.muzychenko.net/en/)).
To import a file:
- Click Import on the File menu (or **Command + O** on Mac, **Ctrl + O** on Windows).
- Choose an audio or video file. Supported formats: "mp3", "wav", "m4a", "ogg", "mp4", "webm", "ogm".
- Select a task, language, quality, and export format.
- Click Import Media File on the File menu (or the '+' icon on the toolbar, or **Command/Ctrl + O**).
- Choose an audio or video file.
- Select a task, language, and the model settings.
- Click Run.
- When the transcription status shows 'Completed', double-click on the row (or select the row and click the '⤢' icon) to
open the transcription.
| Field | Options | Default | Description |
|--------------------|---------------------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------|

View file

@ -334,11 +334,14 @@ class TranscriptionViewerWidget(QWidget):
transcription_task: FileTranscriptionTask
def __init__(
self, transcription_task: FileTranscriptionTask, parent: Optional['QWidget'] = None,
self, transcription_task: FileTranscriptionTask,
open_transcription_output=True,
parent: Optional['QWidget'] = None,
flags: Qt.WindowType = Qt.WindowType.Widget,
) -> None:
super().__init__(parent, flags)
self.transcription_task = transcription_task
self.open_transcription_output = open_transcription_output
self.setMinimumWidth(500)
self.setMinimumHeight(500)
@ -387,7 +390,7 @@ class TranscriptionViewerWidget(QWidget):
return
write_output(path=output_file_path, segments=self.transcription_task.segments,
should_open=True, output_format=output_format)
should_open=self.open_transcription_output, output_format=output_format)
class AdvancedSettingsButton(QPushButton):
@ -778,7 +781,6 @@ class TranscriptionTasksTableWidget(QTableWidget):
self.setSelectionBehavior(
QAbstractItemView.SelectionBehavior.SelectRows)
self.setSelectionMode(QAbstractItemView.SelectionMode.SingleSelection)
def upsert_task(self, task: FileTranscriptionTask):
task_row_index = self.task_row_index(task.id)
@ -830,7 +832,8 @@ class TranscriptionTasksTableWidget(QTableWidget):
@staticmethod
def find_task_id(index: QModelIndex):
return int(index.siblingAtColumn(TranscriptionTasksTableWidget.TASK_ID_COLUMN_INDEX).data())
sibling_index = index.siblingAtColumn(TranscriptionTasksTableWidget.TASK_ID_COLUMN_INDEX).data()
return int(sibling_index) if sibling_index is not None else None
class MainWindowToolbar(QToolBar):
@ -985,23 +988,30 @@ class MainWindow(QMainWindow):
task.status == FileTranscriptionTask.Status.FAILED
def on_clear_history_action_triggered(self):
for task_id, task in list(self.tasks.items()):
if self.task_completed_or_errored(task):
selected_rows = self.table_widget.selectionModel().selectedRows()
if len(selected_rows) == 0:
return
reply = QMessageBox.question(
self, _('Clear History'),
_('Are you sure you want to delete the selected transcription(s)? This action cannot be undone.'))
if reply == QMessageBox.StandardButton.Yes:
task_ids = [TranscriptionTasksTableWidget.find_task_id(selected_row) for selected_row in selected_rows]
for task_id in task_ids:
self.table_widget.clear_task(task_id)
self.tasks.pop(task_id)
self.tasks_changed.emit()
def on_stop_transcription_action_triggered(self):
selected_rows = self.table_widget.selectionModel().selectedRows()
if len(selected_rows) == 0:
return
task_id = TranscriptionTasksTableWidget.find_task_id(selected_rows[0])
task = self.tasks[task_id]
for selected_row in selected_rows:
task_id = TranscriptionTasksTableWidget.find_task_id(selected_row)
task = self.tasks[task_id]
task.status = FileTranscriptionTask.Status.CANCELED
self.tasks_changed.emit()
self.transcriber_worker.cancel_task(task_id)
self.table_widget.upsert_task(task)
task.status = FileTranscriptionTask.Status.CANCELED
self.tasks_changed.emit()
self.transcriber_worker.cancel_task(task_id)
self.table_widget.upsert_task(task)
def on_new_transcription_action_triggered(self):
(file_paths, __) = QFileDialog.getOpenFileNames(
@ -1017,27 +1027,34 @@ class MainWindow(QMainWindow):
def on_open_transcript_action_triggered(self):
selected_rows = self.table_widget.selectionModel().selectedRows()
if len(selected_rows) == 0:
return
task_id = TranscriptionTasksTableWidget.find_task_id(selected_rows[0])
self.open_transcription_viewer(task_id)
for selected_row in selected_rows:
task_id = TranscriptionTasksTableWidget.find_task_id(selected_row)
self.open_transcription_viewer(task_id)
def on_table_selection_changed(self):
enable_open_transcript_action = self.should_enable_open_transcript_action()
self.toolbar.set_open_transcript_action_enabled(enable_open_transcript_action)
self.toolbar.set_open_transcript_action_enabled(self.should_enable_open_transcript_action())
self.toolbar.set_stop_transcription_action_enabled(self.should_enable_stop_transcription_action())
self.toolbar.set_clear_history_action_enabled(self.should_enable_clear_history_action())
def should_enable_open_transcript_action(self):
return self.selected_task_has_status([FileTranscriptionTask.Status.COMPLETED])
return self.selected_tasks_have_status([FileTranscriptionTask.Status.COMPLETED])
def selected_task_has_status(self, statuses: List[FileTranscriptionTask.Status]):
def should_enable_stop_transcription_action(self):
return self.selected_tasks_have_status(
[FileTranscriptionTask.Status.IN_PROGRESS, FileTranscriptionTask.Status.QUEUED])
def should_enable_clear_history_action(self):
return self.selected_tasks_have_status(
[FileTranscriptionTask.Status.COMPLETED, FileTranscriptionTask.Status.FAILED,
FileTranscriptionTask.Status.CANCELED])
def selected_tasks_have_status(self, statuses: List[FileTranscriptionTask.Status]):
selected_rows = self.table_widget.selectionModel().selectedRows()
if len(selected_rows) == 1:
task_id = TranscriptionTasksTableWidget.find_task_id(selected_rows[0])
if self.tasks[task_id].status in statuses:
return True
return False
if len(selected_rows) == 0:
return False
return all(
[self.tasks[TranscriptionTasksTableWidget.find_task_id(selected_row)].status in statuses for selected_row in
selected_rows])
def on_table_double_clicked(self, index: QModelIndex):
task_id = TranscriptionTasksTableWidget.find_task_id(index)
@ -1066,17 +1083,9 @@ class MainWindow(QMainWindow):
self.tasks_cache.save(list(self.tasks.values()))
def on_tasks_changed(self):
self.toolbar.set_clear_history_action_enabled(
any([self.task_completed_or_errored(task) for task in self.tasks.values()]))
enable_open_transcript_action = self.should_enable_open_transcript_action()
self.toolbar.set_open_transcript_action_enabled(enable_open_transcript_action)
self.toolbar.set_open_transcript_action_enabled(self.should_enable_open_transcript_action())
self.toolbar.set_stop_transcription_action_enabled(self.should_enable_stop_transcription_action())
def should_enable_stop_transcription_action(self):
return self.selected_task_has_status(
[FileTranscriptionTask.Status.IN_PROGRESS, FileTranscriptionTask.Status.QUEUED])
self.toolbar.set_clear_history_action_enabled(self.should_enable_clear_history_action())
def closeEvent(self, event: QtGui.QCloseEvent) -> None:
self.transcriber_worker.stop()

View file

@ -486,8 +486,11 @@ def write_output(path: str, segments: List[Segment], should_open: bool, output_f
f'{to_timestamp(segment.start, ms_separator=",")} --> {to_timestamp(segment.end, ms_separator=",")}\n')
file.write(f'{segment.text}\n\n')
logging.debug('Written transcription output')
if should_open:
try:
logging.debug('Opening transcription output')
os.startfile(path)
except AttributeError:
opener = "open" if platform.system() == "Darwin" else "xdg-open"

View file

@ -29,6 +29,9 @@ if platform.system() == 'Windows':
os.add_dll_directory(app_dir)
if __name__ == "__main__":
if platform.system() == 'Linux':
multiprocessing.set_start_method('spawn')
# Fixes opening new window when app has been frozen on Windows:
# https://stackoverflow.com/a/33979091
multiprocessing.freeze_support()

View file

@ -1,4 +1,5 @@
import logging
import multiprocessing
import os.path
import pathlib
import platform
@ -27,6 +28,9 @@ from buzz.transcriber import (FileTranscriptionOptions, FileTranscriptionTask,
from tests.mock_sounddevice import MockInputStream, mock_query_devices
from .mock_qt import MockNetworkAccessManager, MockNetworkReply
if platform.system() == 'Linux':
multiprocessing.set_start_method('spawn')
@pytest.fixture(scope='module', autouse=True)
def audio_setup():
@ -148,13 +152,13 @@ class TestMainWindow:
window = MainWindow(tasks_cache=tasks_cache)
qtbot.add_widget(window)
self.start_new_transcription(window)
self._start_new_transcription(window)
open_transcript_action = self.get_toolbar_action(window, 'Open Transcript')
open_transcript_action = self._get_toolbar_action(window, 'Open Transcript')
assert open_transcript_action.isEnabled() is False
table_widget: QTableWidget = window.findChild(QTableWidget)
qtbot.wait_until(self.assert_task_status(table_widget, 0, 'Completed'), timeout=2 * 60 * 1000)
qtbot.wait_until(self._assert_task_status(table_widget, 0, 'Completed'), timeout=2 * 60 * 1000)
table_widget.setCurrentIndex(table_widget.indexFromItem(table_widget.item(0, 1)))
assert open_transcript_action.isEnabled()
@ -163,7 +167,7 @@ class TestMainWindow:
window = MainWindow(tasks_cache=tasks_cache)
qtbot.add_widget(window)
self.start_new_transcription(window)
self._start_new_transcription(window)
table_widget: QTableWidget = window.findChild(QTableWidget)
@ -178,7 +182,7 @@ class TestMainWindow:
table_widget.selectRow(0)
window.toolbar.stop_transcription_action.trigger()
qtbot.wait_until(self.assert_task_status(table_widget, 0, 'Canceled'), timeout=60 * 1000)
qtbot.wait_until(self._assert_task_status(table_widget, 0, 'Canceled'), timeout=60 * 1000)
table_widget.selectRow(0)
assert window.toolbar.stop_transcription_action.isEnabled() is False
@ -207,10 +211,57 @@ class TestMainWindow:
assert window.toolbar.open_transcript_action.isEnabled() is False
window.close()
def start_new_transcription(self, window: MainWindow):
@pytest.mark.parametrize('tasks_cache', [mock_tasks], indirect=True)
def test_should_clear_history_with_rows_selected(self, qtbot, tasks_cache):
window = MainWindow(tasks_cache=tasks_cache)
table_widget: QTableWidget = window.findChild(QTableWidget)
table_widget.selectAll()
with patch('PyQt6.QtWidgets.QMessageBox.question') as question_message_box_mock:
question_message_box_mock.return_value = QMessageBox.StandardButton.Yes
window.toolbar.clear_history_action.trigger()
assert table_widget.rowCount() == 0
window.close()
@pytest.mark.parametrize('tasks_cache', [mock_tasks], indirect=True)
def test_should_have_clear_history_action_disabled_with_no_rows_selected(self, qtbot, tasks_cache):
window = MainWindow(tasks_cache=tasks_cache)
qtbot.add_widget(window)
assert window.toolbar.clear_history_action.isEnabled() is False
window.close()
@pytest.mark.parametrize('tasks_cache', [mock_tasks], indirect=True)
def test_should_open_transcription_viewer(self, qtbot, tasks_cache):
window = MainWindow(tasks_cache=tasks_cache)
qtbot.add_widget(window)
table_widget: QTableWidget = window.findChild(QTableWidget)
table_widget.selectRow(0)
window.toolbar.open_transcript_action.trigger()
transcription_viewer = window.findChild(TranscriptionViewerWidget)
assert transcription_viewer is not None
window.close()
@pytest.mark.parametrize('tasks_cache', [mock_tasks], indirect=True)
def test_should_have_open_transcript_action_disabled_with_no_rows_selected(self, qtbot, tasks_cache):
window = MainWindow(tasks_cache=tasks_cache)
qtbot.add_widget(window)
assert window.toolbar.open_transcript_action.isEnabled() is False
window.close()
@staticmethod
def _start_new_transcription(window: MainWindow):
with patch('PyQt6.QtWidgets.QFileDialog.getOpenFileNames') as open_file_names_mock:
open_file_names_mock.return_value = ([get_test_asset('whisper-french.mp3')], '')
new_transcription_action = self.get_toolbar_action(window, 'New Transcription')
new_transcription_action = TestMainWindow._get_toolbar_action(window, 'New Transcription')
new_transcription_action.trigger()
file_transcriber_widget: FileTranscriberWidget = window.findChild(FileTranscriberWidget)
@ -218,7 +269,7 @@ class TestMainWindow:
run_button.click()
@staticmethod
def assert_task_status(table_widget: QTableWidget, row_index: int, expected_status: str):
def _assert_task_status(table_widget: QTableWidget, row_index: int, expected_status: str):
def assert_task_canceled():
assert table_widget.rowCount() > 0
assert table_widget.item(row_index, 1).text() == 'whisper-french.mp3'
@ -227,7 +278,7 @@ class TestMainWindow:
return assert_task_canceled
@staticmethod
def get_toolbar_action(window: MainWindow, text: str):
def _get_toolbar_action(window: MainWindow, text: str):
toolbar: QToolBar = window.findChild(QToolBar)
return [action for action in toolbar.actions() if action.text() == text][0]
@ -322,7 +373,7 @@ class TestTranscriptionViewerWidget:
transcription_options=TranscriptionOptions(),
segments=[Segment(40, 299, 'Bien'),
Segment(299, 329, 'venue dans')],
model_path=''))
model_path=''), open_transcription_output=False)
qtbot.add_widget(widget)
assert widget.windowTitle() == 'whisper-french.mp3'
@ -342,7 +393,7 @@ class TestTranscriptionViewerWidget:
transcription_options=TranscriptionOptions(),
segments=[Segment(40, 299, 'Bien'),
Segment(299, 329, 'venue dans')],
model_path=''))
model_path=''), open_transcription_output=False)
qtbot.add_widget(widget)
export_button = widget.findChild(QPushButton)