1292 fix speech dependencies (#1302)

This commit is contained in:
Raivis Dejus 2025-12-06 18:51:40 +02:00 committed by GitHub
commit 11e59dba2b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 61 additions and 1 deletions

1
.gitignore vendored
View file

@ -11,6 +11,7 @@ coverage.xml
.idea/
.venv/
venv/
.claude/
# whisper_cpp
whisper_cpp

View file

@ -22,6 +22,19 @@ datas += copy_metadata("tokenizers")
datas += copy_metadata("huggingface-hub")
datas += copy_metadata("safetensors")
datas += copy_metadata("pyyaml")
datas += copy_metadata("julius")
datas += copy_metadata("openunmix")
datas += copy_metadata("lameenc")
datas += copy_metadata("diffq")
datas += copy_metadata("einops")
datas += copy_metadata("hydra-core")
datas += copy_metadata("hydra-colorlog")
datas += copy_metadata("museval")
datas += copy_metadata("submitit")
datas += copy_metadata("treetable")
datas += copy_metadata("soundfile")
datas += copy_metadata("dora-search")
datas += copy_metadata("lhotse")
# Allow transformers package to load __init__.py file dynamically:
# https://github.com/chidiwilliams/buzz/issues/272
@ -92,7 +105,22 @@ a = Analysis(
pathex=[],
binaries=binaries,
datas=datas,
hiddenimports=[],
hiddenimports=[
"dora", "dora.log",
"julius", "julius.core", "julius.resample",
"openunmix", "openunmix.filtering",
"lameenc",
"diffq",
"einops",
"hydra", "hydra.core", "hydra.core.global_hydra",
"hydra_colorlog",
"museval",
"submitit",
"treetable",
"soundfile",
"_soundfile_data",
"lhotse",
],
hookspath=[],
hooksconfig={},
runtime_hooks=[],

View file

@ -1,12 +1,42 @@
import logging
import multiprocessing
import queue
import sys
from pathlib import Path
from typing import Optional, Tuple, List, Set
from uuid import UUID
from PyQt6.QtCore import QObject, QThread, pyqtSignal, pyqtSlot
# Patch subprocess for demucs to prevent console windows on Windows
if sys.platform == "win32":
import subprocess
_original_run = subprocess.run
_original_check_output = subprocess.check_output
def _patched_run(*args, **kwargs):
if 'startupinfo' not in kwargs:
si = subprocess.STARTUPINFO()
si.dwFlags |= subprocess.STARTF_USESHOWWINDOW
si.wShowWindow = subprocess.SW_HIDE
kwargs['startupinfo'] = si
if 'creationflags' not in kwargs:
kwargs['creationflags'] = subprocess.CREATE_NO_WINDOW
return _original_run(*args, **kwargs)
def _patched_check_output(*args, **kwargs):
if 'startupinfo' not in kwargs:
si = subprocess.STARTUPINFO()
si.dwFlags |= subprocess.STARTF_USESHOWWINDOW
si.wShowWindow = subprocess.SW_HIDE
kwargs['startupinfo'] = si
if 'creationflags' not in kwargs:
kwargs['creationflags'] = subprocess.CREATE_NO_WINDOW
return _original_check_output(*args, **kwargs)
subprocess.run = _patched_run
subprocess.check_output = _patched_check_output
from demucs import api as demucsApi
from buzz.model_loader import ModelType
@ -95,6 +125,7 @@ class FileTranscriberQueueWorker(QObject):
logging.error(f"Error during speech extraction: {e}", exc_info=True)
logging.debug("Starting next transcription task")
self.task_progress.emit(self.current_task, 0)
model_type = self.current_task.transcription_options.model.model_type
if model_type == ModelType.OPEN_AI_WHISPER_API: