From 734bd99d17c53b97245ca3b9e0bf37e8bf75926d Mon Sep 17 00:00:00 2001 From: Raivis Dejus Date: Sun, 21 Dec 2025 20:02:39 +0200 Subject: [PATCH] 978 add youtube title (#1321) --- buzz/db/dao/transcription_dao.py | 16 ++++++++++ buzz/db/service/transcription_service.py | 3 ++ buzz/transcriber/file_transcriber.py | 29 +++++++++++++++++-- buzz/widgets/main_window.py | 9 ++++++ .../transcription_tasks_table_widget.py | 4 +-- .../transcription_tasks_table_widget_test.py | 14 ++++----- 6 files changed, 63 insertions(+), 12 deletions(-) diff --git a/buzz/db/dao/transcription_dao.py b/buzz/db/dao/transcription_dao.py index 928099c3..db5107b4 100644 --- a/buzz/db/dao/transcription_dao.py +++ b/buzz/db/dao/transcription_dao.py @@ -250,6 +250,22 @@ class TranscriptionDAO(DAO[Transcription]): if not query.exec(): raise Exception(query.lastError().text()) + def update_transcription_file_and_name(self, id: UUID, file_path: str, name: str | None = None): + query = self._create_query() + query.prepare( + """ + UPDATE transcription + SET file = :file, name = COALESCE(:name, name) + WHERE id = :id + """ + ) + + query.bindValue(":id", str(id)) + query.bindValue(":file", file_path) + query.bindValue(":name", name) + if not query.exec(): + raise Exception(query.lastError().text()) + def update_transcription_name(self, id: UUID, name: str): query = self._create_query() query.prepare( diff --git a/buzz/db/service/transcription_service.py b/buzz/db/service/transcription_service.py index 4c500800..8a15a24e 100644 --- a/buzz/db/service/transcription_service.py +++ b/buzz/db/service/transcription_service.py @@ -47,6 +47,9 @@ class TranscriptionService: ) ) + def update_transcription_file_and_name(self, id: UUID, file_path: str, name: str | None = None): + self.transcription_dao.update_transcription_file_and_name(id, file_path, name) + def update_transcription_name(self, id: UUID, name: str): self.transcription_dao.update_transcription_name(id, name) diff --git a/buzz/transcriber/file_transcriber.py b/buzz/transcriber/file_transcriber.py index 5943c8a0..250c27b6 100755 --- a/buzz/transcriber/file_transcriber.py +++ b/buzz/transcriber/file_transcriber.py @@ -38,12 +38,35 @@ class FileTranscriber(QObject): @pyqtSlot() def run(self): if self.transcription_task.source == FileTranscriptionTask.Source.URL_IMPORT: - temp_output_path = tempfile.mktemp() + cookiefile = os.getenv("BUZZ_DOWNLOAD_COOKIEFILE") + + # First extract info to get the video title + extract_options = { + "logger": logging.getLogger(), + } + if cookiefile: + extract_options["cookiefile"] = cookiefile + + try: + with YoutubeDL(extract_options) as ydl_info: + info = ydl_info.extract_info(self.transcription_task.url, download=False) + video_title = info.get("title", "audio") + except Exception as exc: + logging.debug(f"Error extracting video info: {exc}") + video_title = "audio" + + # Sanitize title for use as filename + video_title = YoutubeDL.sanitize_info({"title": video_title})["title"] + # Remove characters that are problematic in filenames + for char in ['/', '\\', ':', '*', '?', '"', '<', '>', '|']: + video_title = video_title.replace(char, '_') + + # Create temp directory and use video title as filename + temp_dir = tempfile.mkdtemp() + temp_output_path = os.path.join(temp_dir, video_title) wav_file = temp_output_path + ".wav" wav_file = str(Path(wav_file).resolve()) - cookiefile = os.getenv("BUZZ_DOWNLOAD_COOKIEFILE") - options = { "format": "bestaudio/best", "progress_hooks": [self.on_download_progress], diff --git a/buzz/widgets/main_window.py b/buzz/widgets/main_window.py index 4ddaf990..91f408ae 100644 --- a/buzz/widgets/main_window.py +++ b/buzz/widgets/main_window.py @@ -385,6 +385,15 @@ class MainWindow(QMainWindow): pass def on_task_completed(self, task: FileTranscriptionTask, segments: List[Segment]): + # Update file path in database (important for URL imports where file is downloaded) + if task.file_path: + logging.debug(f"Updating transcription file path: {task.file_path}") + # For URL imports, use the file basename (video title) as the display name + name = None + if task.source == FileTranscriptionTask.Source.URL_IMPORT: + basename = os.path.basename(task.file_path) + name = os.path.splitext(basename)[0] # Remove .wav extension + self.transcription_service.update_transcription_file_and_name(task.uid, task.file_path, name) self.transcription_service.update_transcription_as_completed(task.uid, segments) self.table_widget.refresh_row(task.uid) diff --git a/buzz/widgets/transcription_tasks_table_widget.py b/buzz/widgets/transcription_tasks_table_widget.py index 296ea79e..3fe0db7b 100644 --- a/buzz/widgets/transcription_tasks_table_widget.py +++ b/buzz/widgets/transcription_tasks_table_widget.py @@ -95,8 +95,8 @@ column_definitions = [ width=400, delegate=RecordDelegate( text_getter=lambda record: record.value("name") or ( - record.value("url") if record.value("url") != "" - else os.path.basename(record.value("file")) + os.path.basename(record.value("file")) if record.value("file") + else record.value("url") or "" ) ), hidden_toggleable=False, diff --git a/tests/widgets/transcription_tasks_table_widget_test.py b/tests/widgets/transcription_tasks_table_widget_test.py index 785866f1..77bb0514 100644 --- a/tests/widgets/transcription_tasks_table_widget_test.py +++ b/tests/widgets/transcription_tasks_table_widget_test.py @@ -286,16 +286,16 @@ class TestTranscriptionTasksTableWidget: text = file_column_def.delegate.callback(record_with_name) assert text == "Custom Name" - # Test fallback to URL when no name - record_url_fallback = mock_record({"name": None, "url": "http://example.com/audio.mp3", "file": "/path/file.mp3"}) + # Test fallback to file basename when no name (file takes priority over URL) + record_file_fallback = mock_record({"name": None, "url": "http://example.com/audio.mp3", "file": "/path/file.mp3"}) + text = file_column_def.delegate.callback(record_file_fallback) + assert text == "file.mp3" + + # Test fallback to URL when no name and no file + record_url_fallback = mock_record({"name": None, "url": "http://example.com/audio.mp3", "file": ""}) text = file_column_def.delegate.callback(record_url_fallback) assert text == "http://example.com/audio.mp3" - # Test fallback to filename when no name or URL - record_file_fallback = mock_record({"name": None, "url": "", "file": "/path/to/audio.mp3"}) - text = file_column_def.delegate.callback(record_file_fallback) - assert text == "audio.mp3" - def test_notes_column_text_getter(self, widget): """Test that notes column displays notes or empty string""" notes_column_def = next((col for col in column_definitions if col.column == Column.NOTES), None)