Adding ability to resize subtitles (#888)

This commit is contained in:
Raivis Dejus 2024-08-17 13:03:28 +03:00 committed by GitHub
commit 717f855288
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 225 additions and 22 deletions

View file

@ -155,6 +155,10 @@ jobs:
if [ "$RUNNER_OS" == "macOS" ]; then
brew install create-dmg
# kill XProtect to prevent https://github.com/actions/runner-images/issues/7522
sudo pkill -9 XProtect >/dev/null || true;
while pgrep XProtect; do sleep 3; done;
# create variables
CERTIFICATE_PATH=$RUNNER_TEMP/build_certificate.p12

View file

@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<svg width="800px" height="800px" viewBox="0 0 16 16" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<rect width="16" height="16" id="icon-bound" fill="none" />
<path d="M2,8L8,8L8,14L6,14L6,11.4L1.4,16L-0.014,14.586L4.572,10L2,10L2,8ZM8,2L10,2L10,4.6L14.6,0L16.014,1.414L11.428,6L14,6L14,8L8,8L8,2Z" />
</svg>

After

Width:  |  Height:  |  Size: 399 B

View file

@ -25,6 +25,18 @@ class TranscriptionSegmentDAO(DAO[TranscriptionSegment]):
query.bindValue(":transcription_id", str(transcription_id))
return self._execute_all(query)
def delete_segments(self, transcription_id: UUID):
query = self._create_query()
query.prepare(
f"""
DELETE FROM {self.table}
WHERE transcription_id = :transcription_id
"""
)
query.bindValue(":transcription_id", str(transcription_id))
if not query.exec():
raise Exception(query.lastError().text())
def update_segment_translation(self, segment_id: int, translation: str):
query = self._create_query()
query.prepare(

View file

@ -35,5 +35,6 @@ def _setup_db(path: str) -> QSqlDatabase:
db.setDatabaseName(path)
if not db.open():
raise RuntimeError(f"Failed to open database connection: {db.databaseName()}")
db.exec('PRAGMA foreign_keys = ON')
logging.debug("Database connection opened: %s", db.databaseName())
return db

View file

@ -44,6 +44,19 @@ class TranscriptionService:
)
)
def replace_transcription_segments(self, id: UUID, segments: List[Segment]):
self.transcription_segment_dao.delete_segments(id)
for segment in segments:
self.transcription_segment_dao.insert(
TranscriptionSegment(
start_time=segment.start,
end_time=segment.end,
text=segment.text,
translation='',
transcription_id=str(id),
)
)
def get_transcription_segments(self, transcription_id: UUID):
return self.transcription_segment_dao.get_segments(transcription_id)

View file

@ -8,8 +8,8 @@ msgid ""
msgstr ""
"Project-Id-Version: \n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2024-07-11 19:46+0300\n"
"PO-Revision-Date: 2024-07-11 19:48+0300\n"
"POT-Creation-Date: 2024-08-17 11:00+0300\n"
"PO-Revision-Date: 2024-08-17 11:02+0300\n"
"Last-Translator: \n"
"Language-Team: \n"
"Language: lv_LV\n"
@ -29,12 +29,14 @@ msgstr "https://example.com/audio.mp3"
#: buzz/widgets/import_url_dialog.py:28
#: buzz/widgets/preferences_dialog/preferences_dialog.py:69
#: buzz/widgets/transcriber/advanced_settings_dialog.py:97
#: buzz/widgets/transcription_viewer/transcription_viewer_widget.py:290
#: buzz/widgets/main_window.py:234
msgid "Ok"
msgstr "Labi"
#: buzz/widgets/import_url_dialog.py:29
#: buzz/widgets/preferences_dialog/preferences_dialog.py:70
#: buzz/widgets/transcription_viewer/transcription_viewer_widget.py:291
#: buzz/widgets/main_window.py:235
msgid "Cancel"
msgstr "Atcelt"
@ -114,7 +116,7 @@ msgid ""
msgstr ""
"OpenAI API atbilde ir nederīga. Lūdzu pārbaudiet API Adresi un savu atslēgu. "
"Atpazīšana un tulkošana joprojām var strādāt, ja API neatbalsta atslēgu "
"pārbaudi. "
"pārbaudi."
#: buzz/widgets/preferences_dialog/folder_watch_preferences_widget.py:42
msgid "Enable folder watch"
@ -201,7 +203,7 @@ msgid "Download failed"
msgstr "Lejupielāde neizdevās"
#: buzz/widgets/preferences_dialog/models_preferences_widget.py:259
#: buzz/widgets/main_window.py:291 buzz/model_loader.py:462
#: buzz/widgets/main_window.py:291 buzz/model_loader.py:478
msgid "Error"
msgstr "Kļūda"
@ -378,11 +380,11 @@ msgstr "Gaida MI tulkojumu..."
msgid "Microphone:"
msgstr "Mikrofons:"
#: buzz/widgets/recording_transcriber_widget.py:413
#: buzz/widgets/recording_transcriber_widget.py:414
msgid "An error occurred while starting a new recording:"
msgstr "Sākot jaunu ierakstu notikusi kļūda:"
#: buzz/widgets/recording_transcriber_widget.py:417
#: buzz/widgets/recording_transcriber_widget.py:418
msgid ""
"Please check your audio devices or check the application logs for more "
"information."
@ -427,23 +429,35 @@ msgstr "Skats"
msgid "Timestamps"
msgstr "Laiks"
#: buzz/widgets/transcription_viewer/transcription_viewer_widget.py:140
#: buzz/widgets/transcription_viewer/transcription_viewer_widget.py:160
msgid "Export"
msgstr "Eksportēt"
#: buzz/widgets/transcription_viewer/transcription_viewer_widget.py:154
#: buzz/widgets/transcription_viewer/transcription_viewer_widget.py:174
#: buzz/transcriber/transcriber.py:24
msgid "Translate"
msgstr "Tulkot"
#: buzz/widgets/transcription_viewer/transcription_viewer_widget.py:244
#: buzz/widgets/transcription_viewer/transcription_viewer_widget.py:184
msgid "Resize"
msgstr "Mainīt garumu"
#: buzz/widgets/transcription_viewer/transcription_viewer_widget.py:274
msgid "API Key Required"
msgstr "API atslēgas kļūda"
#: buzz/widgets/transcription_viewer/transcription_viewer_widget.py:245
#: buzz/widgets/transcription_viewer/transcription_viewer_widget.py:275
msgid "Please enter OpenAI API Key in preferences"
msgstr "Lūdzu ievadiet OpenAI API atslēgu iestatījumos"
#: buzz/widgets/transcription_viewer/transcription_viewer_widget.py:292
msgid "Desired subtitle length"
msgstr "Vēlamais teksta garums"
#: buzz/widgets/transcription_viewer/transcription_viewer_widget.py:293
msgid "Enter target characters per subtitle:"
msgstr "Ievadiet vēlamo simbolu skaitu tekstā:"
#: buzz/widgets/transcription_viewer/export_transcription_menu.py:76
msgid "Save File"
msgstr "Saglabāt failu"
@ -525,7 +539,7 @@ msgstr "Neizdevās saglabāt OpenAI API atslēgu atslēgu saišķī"
msgid "Transcribe"
msgstr "Atpazīt"
#: buzz/model_loader.py:490
#: buzz/model_loader.py:507
msgid "A connection error occurred"
msgstr "Notika savienojuma kļūda"

View file

@ -78,6 +78,9 @@ class TranslateIcon(Icon):
def __init__(self, parent: QWidget):
super().__init__(get_path("assets/translate_black.svg"), parent)
class ResizeIcon(Icon):
def __init__(self, parent: QWidget):
super().__init__(get_path("assets/resize_black.svg"), parent)
class VisibilityIcon(Icon):
def __init__(self, parent: QWidget):

View file

@ -123,16 +123,19 @@ class TranscriptionSegmentsEditorWidget(QTableView):
# Show start before end
self.horizontalHeader().swapSections(1, 2)
font_metrics = QFontMetrics(self.font())
max_row_height = font_metrics.height() * 4
for row in range(self.model().rowCount()):
self.setRowHeight(row, max_row_height)
self.init_row_height()
self.setColumnWidth(Column.START.value, 95)
self.setColumnWidth(Column.END.value, 95)
self.setWordWrap(True)
def init_row_height(self):
font_metrics = QFontMetrics(self.font())
max_row_height = font_metrics.height() * 4
for row in range(self.model().rowCount()):
self.setRowHeight(row, max_row_height)
def has_non_empty_translation(self) -> bool:
for i in range(self.model().rowCount()):
if self.model().record(i).value("translation").strip():

View file

@ -1,10 +1,9 @@
import logging
import platform
from typing import Optional
from uuid import UUID
from PyQt6.QtCore import Qt, QThread
from PyQt6.QtGui import QFont
from PyQt6.QtCore import Qt, QThread, pyqtSignal
from PyQt6.QtGui import QFont, QShowEvent
from PyQt6.QtMultimedia import QMediaPlayer
from PyQt6.QtSql import QSqlRecord
from PyQt6.QtWidgets import (
@ -13,8 +12,13 @@ from PyQt6.QtWidgets import (
QToolButton,
QLabel,
QMessageBox,
QInputDialog,
QDialogButtonBox,
)
import srt
from srt_equalizer import srt_equalizer
from buzz.locale import _
from buzz.db.entity.transcription import Transcription
from buzz.db.service.transcription_service import TranscriptionService
@ -25,12 +29,13 @@ from buzz.store.keyring_store import get_password, Key
from buzz.widgets.audio_player import AudioPlayer
from buzz.widgets.icon import (
FileDownloadIcon,
TranslateIcon
TranslateIcon,
ResizeIcon,
)
from buzz.translator import Translator
from buzz.widgets.text_display_box import TextDisplayBox
from buzz.widgets.toolbar import ToolBar
from buzz.transcriber.transcriber import TranscriptionOptions
from buzz.transcriber.transcriber import TranscriptionOptions, Segment
from buzz.widgets.transcriber.advanced_settings_dialog import AdvancedSettingsDialog
from buzz.widgets.transcription_viewer.export_transcription_menu import (
ExportTranscriptionMenu,
@ -47,7 +52,18 @@ from buzz.widgets.transcription_viewer.transcription_view_mode_tool_button impor
)
class OkEnabledInputDialog(QInputDialog):
def showEvent(self, event: QShowEvent) -> None:
super().showEvent(event)
button_box = self.findChild(QDialogButtonBox)
if button_box:
ok_button = button_box.button(QDialogButtonBox.StandardButton.Ok)
if ok_button:
ok_button.setEnabled(True)
class TranscriptionViewerWidget(QWidget):
resize_button_clicked = pyqtSignal()
transcription: Transcription
settings = Settings()
@ -160,6 +176,17 @@ class TranscriptionViewerWidget(QWidget):
toolbar.addWidget(translate_button)
resize_button = QToolButton()
resize_button.setText(_("Resize"))
resize_button.setObjectName("resize_button")
resize_button.setIcon(ResizeIcon(self))
resize_button.setToolButtonStyle(
Qt.ToolButtonStyle.ToolButtonTextBesideIcon
)
resize_button.clicked.connect(self.on_resize_button_clicked)
toolbar.addWidget(resize_button)
layout.setMenuBar(toolbar)
layout.addWidget(self.table_widget)
@ -255,6 +282,63 @@ class TranscriptionViewerWidget(QWidget):
for segment in segments:
self.translator.enqueue(segment.value("text"), segment.value("id"))
def on_resize_button_clicked(self):
target_chars_dialog = OkEnabledInputDialog(self)
target_chars_dialog.setOkButtonText(_("Ok"))
target_chars_dialog.setCancelButtonText(_("Cancel"))
target_chars_dialog.setWindowTitle(_("Desired subtitle length"))
target_chars_dialog.setLabelText(_("Enter target characters per subtitle:"))
target_chars_dialog.setIntValue(42)
target_chars_dialog.setIntMaximum(100)
target_chars_dialog.setIntMinimum(1)
target_chars_dialog.setIntStep(1)
target_chars_dialog.setInputMode(QInputDialog.InputMode.IntInput)
if target_chars_dialog.exec() == QInputDialog.DialogCode.Accepted:
target_chars = target_chars_dialog.intValue()
else:
return
segments = self.table_widget.segments()
subs = []
for segment in segments:
subtitle = srt.Subtitle(
index=segment.value("id"),
start=segment.value("start_time"),
end=segment.value("end_time"),
content=segment.value("text")
)
subs.append(subtitle)
resized_subs = []
last_index = 0
# Limit each subtitle to a maximum character length, splitting into
# multiple subtitle items if necessary.
for sub in subs:
new_subs = srt_equalizer.split_subtitle(
sub=sub, target_chars=target_chars, start_from_index=last_index, method="punctuation")
last_index = new_subs[-1].index
resized_subs.extend(new_subs)
segments = [
Segment(
round(sub.start),
round(sub.end),
sub.content
)
for sub in resized_subs
if round(sub.start) != round(sub.end)
]
self.transcription_service.replace_transcription_segments(
UUID(hex=self.transcription.id),
segments
)
self.table_widget.model().select()
self.table_widget.init_row_height()
def closeEvent(self, event):
self.hide()

26
poetry.lock generated
View file

@ -2682,6 +2682,30 @@ CFFI = ">=1.0"
[package.extras]
numpy = ["NumPy"]
[[package]]
name = "srt"
version = "3.5.3"
description = "A tiny library for parsing, modifying, and composing SRT files."
optional = false
python-versions = ">=2.7"
files = [
{file = "srt-3.5.3.tar.gz", hash = "sha256:4884315043a4f0740fd1f878ed6caa376ac06d70e135f306a6dc44632eed0cc0"},
]
[[package]]
name = "srt-equalizer"
version = "0.1.10"
description = "Transform subtitle line lengths, splitting into multiple subtitle fragments if necessary. "
optional = false
python-versions = "<4.0,>=3.8"
files = [
{file = "srt_equalizer-0.1.10-py3-none-any.whl", hash = "sha256:7b0cca73374ff7351badf2dc5266cabefdfd36b91ff736a535445562036cf8ed"},
{file = "srt_equalizer-0.1.10.tar.gz", hash = "sha256:5f6b1b2c48b12bb1caab13822d7ddd0a54a87772b824a0aa2ba64c033d379353"},
]
[package.dependencies]
srt = ">=3.5.3,<4.0.0"
[[package]]
name = "stable-ts"
version = "2.15.9"
@ -3400,4 +3424,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
[metadata]
lock-version = "2.0"
python-versions = ">=3.9,<3.13"
content-hash = "c62d3e0a79ed56cccbbd28548655a072ca9e144fc5700260f788f1b88057faa7"
content-hash = "3997325c46fdc5fe89d1467778f846a19eced146d7879c3ae3658ccdc8861ada"

View file

@ -36,6 +36,7 @@ faster-whisper = "1.0.1"
openai-whisper = "v20231117"
transformers = "4.39.1"
polib = "^1.2.0"
srt-equalizer = "^0.1.10"
[tool.poetry.group.dev.dependencies]
autopep8 = "^1.7.0"

View file

@ -1,8 +1,8 @@
import uuid
import time
import pytest
from pytestqt.qtbot import QtBot
from unittest.mock import patch
from buzz.locale import _
from buzz.db.entity.transcription import Transcription
@ -13,6 +13,11 @@ from buzz.widgets.transcription_viewer.transcription_view_mode_tool_button impor
TranscriptionViewModeToolButton,
ViewMode
)
from PyQt6.QtCore import Qt
from PyQt6.QtWidgets import (
QToolButton,
QInputDialog,
)
from buzz.widgets.transcription_viewer.transcription_segments_editor_widget import (
TranscriptionSegmentsEditorWidget,
)
@ -77,6 +82,40 @@ class TestTranscriptionViewerWidget:
editor.model().setData(editor.model().index(0, 3), "Biens")
widget.close()
@patch('buzz.widgets.transcription_viewer.transcription_viewer_widget.OkEnabledInputDialog')
def test_should_resize_segment_text(self, mock_dialog, qtbot, transcription, transcription_service, shortcuts):
mock_dialog.return_value.exec.return_value = QInputDialog.DialogCode.Accepted
mock_dialog.return_value.intValue.return_value = 5
widget = TranscriptionViewerWidget(
transcription, transcription_service, shortcuts
)
qtbot.add_widget(widget)
editor = widget.findChild(TranscriptionSegmentsEditorWidget)
assert editor.model().index(1, 1).data() == 329
assert editor.model().index(1, 2).data() == 299
assert editor.model().index(1, 3).data() == "venue dans"
with qtbot.waitSignal(widget.resize_button_clicked, timeout=1000):
qtbot.mouseClick(widget.findChild(QToolButton, "resize_button"), Qt.MouseButton.LeftButton)
widget.resize_button_clicked.emit()
assert editor.model().index(0, 1).data() == 299
assert editor.model().index(0, 2).data() == 40
assert editor.model().index(0, 3).data() == "Bien"
assert editor.model().index(1, 1).data() == 314
assert editor.model().index(1, 2).data() == 299
assert editor.model().index(1, 3).data() == "venue"
assert editor.model().index(2, 1).data() == 329
assert editor.model().index(2, 2).data() == 314
assert editor.model().index(2, 3).data() == "dans"
widget.close()
def test_text_button_changes_view_mode(
self, qtbot, transcription, transcription_service, shortcuts
):