From dc0dc6b3d2abc66971433ae606b70e7b9df5026d Mon Sep 17 00:00:00 2001 From: Raivis Dejus Date: Sat, 13 Dec 2025 08:05:55 +0200 Subject: [PATCH] Adding speech extraction option to CLI (#1311) --- buzz/cli.py | 6 ++++++ docs/docs/cli.md | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/buzz/cli.py b/buzz/cli.py index b054e440..6fd56df0 100644 --- a/buzz/cli.py +++ b/buzz/cli.py @@ -102,6 +102,9 @@ def parse(app: Application, parser: QCommandLineParser): word_timestamp_option = QCommandLineOption( ["w", "word-timestamps"], "Generate word-level timestamps." ) + extract_speech_option = QCommandLineOption( + ["e", "extract-speech"], "Extract speech from audio before transcribing." + ) open_ai_access_token_option = QCommandLineOption( "openai-token", f"OpenAI access token. Use only when --model-type is {CommandLineModelType.OPEN_AI_WHISPER_API.value}. Defaults to your previously saved access token, if one exists.", @@ -124,6 +127,7 @@ def parse(app: Application, parser: QCommandLineParser): language_option, initial_prompt_option, word_timestamp_option, + extract_speech_option, open_ai_access_token_option, output_directory_option, srt_option, @@ -178,6 +182,7 @@ def parse(app: Application, parser: QCommandLineParser): initial_prompt = parser.value(initial_prompt_option) word_timestamps = parser.isSet(word_timestamp_option) + extract_speech = parser.isSet(extract_speech_option) output_formats: typing.Set[OutputFormat] = set() if parser.isSet(srt_option): @@ -205,6 +210,7 @@ def parse(app: Application, parser: QCommandLineParser): language=language, initial_prompt=initial_prompt, word_level_timings=word_timestamps, + extract_speech=extract_speech, openai_access_token=openai_access_token, ) diff --git a/docs/docs/cli.md b/docs/docs/cli.md index 751ed097..a8df135a 100644 --- a/docs/docs/cli.md +++ b/docs/docs/cli.md @@ -60,7 +60,8 @@ Options: (Yiddish), yo (Yoruba), zh (Chinese). Leave empty to detect language. -p, --prompt Initial prompt. - -w, --word-timestamps Generate word-level timestamps. (available since 1.2.0) + -w, --word-timestamps Generate word-level timestamps. (available since 1.2.0) + -e, --extract-speech Extract speech from audio before transcribing. (available since 1.3.0) --openai-token OpenAI access token. Use only when --model-type is openaiapi. Defaults to your previously saved access token, if one exists.