mirror of
https://github.com/chidiwilliams/buzz.git
synced 2026-03-14 22:55:46 +01:00
227 lines
10 KiB
Python
227 lines
10 KiB
Python
"""Custom build hook for hatchling to build whisper.cpp binaries."""
|
|
import glob
|
|
import subprocess
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
from hatchling.builders.hooks.plugin.interface import BuildHookInterface
|
|
|
|
|
|
class CustomBuildHook(BuildHookInterface):
|
|
"""Build hook to compile whisper.cpp before building the package."""
|
|
|
|
def initialize(self, version, build_data):
|
|
"""Run make buzz/whisper_cpp before building."""
|
|
print("Running 'make buzz/whisper_cpp' to build whisper.cpp binaries...")
|
|
|
|
# Mark wheel as platform-specific since we're including compiled binaries
|
|
# But set tag to py3-none since binaries are standalone (no Python C extensions)
|
|
if version == "standard": # Only for wheel builds
|
|
import platform
|
|
|
|
build_data["pure_python"] = False
|
|
|
|
# Determine the platform tag based on current OS and architecture
|
|
system = platform.system().lower()
|
|
machine = platform.machine().lower()
|
|
|
|
if system == "linux":
|
|
if machine in ("x86_64", "amd64"):
|
|
tag = "py3-none-manylinux_2_34_x86_64"
|
|
else:
|
|
raise ValueError(f"Unsupported Linux architecture: {machine}. Only x86_64 is supported.")
|
|
elif system == "darwin":
|
|
if machine in ("x86_64", "amd64"):
|
|
tag = "py3-none-macosx_10_9_x86_64"
|
|
elif machine in ("arm64", "aarch64"):
|
|
tag = "py3-none-macosx_11_0_arm64"
|
|
else:
|
|
raise ValueError(f"Unsupported macOS architecture: {machine}")
|
|
elif system == "windows":
|
|
if machine in ("x86_64", "amd64"):
|
|
tag = "py3-none-win_amd64"
|
|
else:
|
|
raise ValueError(f"Unsupported Windows architecture: {machine}. Only x86_64 is supported.")
|
|
else:
|
|
raise ValueError(f"Unsupported operating system: {system}")
|
|
|
|
if tag:
|
|
build_data["tag"] = tag
|
|
print(f"Building wheel with tag: {tag}")
|
|
|
|
# Get the project root directory
|
|
project_root = Path(self.root)
|
|
|
|
try:
|
|
# Run the make command
|
|
result = subprocess.run(
|
|
["make", "buzz/whisper_cpp"],
|
|
cwd=project_root,
|
|
check=True,
|
|
capture_output=True,
|
|
text=True
|
|
)
|
|
print(result.stdout)
|
|
if result.stderr:
|
|
print(result.stderr, file=sys.stderr)
|
|
print("Successfully built whisper.cpp binaries")
|
|
|
|
# Run the make command for translation files
|
|
result = subprocess.run(
|
|
["make", "translation_mo"],
|
|
cwd=project_root,
|
|
check=True,
|
|
capture_output=True,
|
|
text=True
|
|
)
|
|
print(result.stdout)
|
|
if result.stderr:
|
|
print(result.stderr, file=sys.stderr)
|
|
print("Successfully compiled translation files")
|
|
|
|
# Build ctc_forced_aligner C++ extension in-place
|
|
print("Building ctc_forced_aligner C++ extension...")
|
|
ctc_aligner_dir = project_root / "ctc_forced_aligner"
|
|
|
|
# Apply local patches before building.
|
|
# Uses --check first to avoid touching the working tree unnecessarily,
|
|
# which is safer in a detached-HEAD submodule.
|
|
patches_dir = project_root / "patches"
|
|
for patch_file in sorted(patches_dir.glob("ctc_forced_aligner_*.patch")):
|
|
# Dry-run forward: succeeds only if patch is NOT yet applied.
|
|
check_forward = subprocess.run(
|
|
["git", "apply", "--check", "--ignore-whitespace", str(patch_file)],
|
|
cwd=ctc_aligner_dir,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
if check_forward.returncode == 0:
|
|
# Patch can be applied — do it for real.
|
|
subprocess.run(
|
|
["git", "apply", "--ignore-whitespace", str(patch_file)],
|
|
cwd=ctc_aligner_dir,
|
|
check=True,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
print(f"Applied patch: {patch_file.name}")
|
|
else:
|
|
# Dry-run failed — either already applied or genuinely broken.
|
|
check_reverse = subprocess.run(
|
|
["git", "apply", "--check", "--reverse", "--ignore-whitespace", str(patch_file)],
|
|
cwd=ctc_aligner_dir,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
if check_reverse.returncode == 0:
|
|
print(f"Patch already applied (skipping): {patch_file.name}")
|
|
else:
|
|
print(f"WARNING: could not apply patch {patch_file.name}: {check_forward.stderr}", file=sys.stderr)
|
|
|
|
result = subprocess.run(
|
|
[sys.executable, "setup.py", "build_ext", "--inplace"],
|
|
cwd=ctc_aligner_dir,
|
|
check=True,
|
|
capture_output=True,
|
|
text=True
|
|
)
|
|
print(result.stdout)
|
|
if result.stderr:
|
|
print(result.stderr, file=sys.stderr)
|
|
print("Successfully built ctc_forced_aligner C++ extension")
|
|
|
|
# Force include all files in buzz/whisper_cpp directory
|
|
whisper_cpp_dir = project_root / "buzz" / "whisper_cpp"
|
|
if whisper_cpp_dir.exists():
|
|
# Get all files in the whisper_cpp directory
|
|
whisper_files = glob.glob(str(whisper_cpp_dir / "**" / "*"), recursive=True)
|
|
|
|
# Filter only files (not directories)
|
|
whisper_files = [f for f in whisper_files if Path(f).is_file()]
|
|
|
|
# Add them to force_include
|
|
if 'force_include' not in build_data:
|
|
build_data['force_include'] = {}
|
|
|
|
for file_path in whisper_files:
|
|
# Convert to relative path from project root
|
|
rel_path = Path(file_path).relative_to(project_root)
|
|
build_data['force_include'][str(rel_path)] = str(rel_path)
|
|
|
|
print(f"Force including {len(whisper_files)} files from buzz/whisper_cpp/")
|
|
else:
|
|
print(f"Warning: {whisper_cpp_dir} does not exist after build", file=sys.stderr)
|
|
|
|
# Force include demucs package at top level (demucs_repo/demucs -> demucs/)
|
|
demucs_pkg_dir = project_root / "demucs_repo" / "demucs"
|
|
if demucs_pkg_dir.exists():
|
|
# Get all files in the demucs package directory
|
|
demucs_files = glob.glob(str(demucs_pkg_dir / "**" / "*"), recursive=True)
|
|
|
|
# Filter only files (not directories)
|
|
demucs_files = [f for f in demucs_files if Path(f).is_file()]
|
|
|
|
# Add them to force_include, mapping to top-level demucs/
|
|
if 'force_include' not in build_data:
|
|
build_data['force_include'] = {}
|
|
|
|
for file_path in demucs_files:
|
|
# Convert to relative path from demucs package dir
|
|
rel_from_pkg = Path(file_path).relative_to(demucs_pkg_dir)
|
|
# Target path is demucs/<relative_path>
|
|
target_path = Path("demucs") / rel_from_pkg
|
|
build_data['force_include'][str(file_path)] = str(target_path)
|
|
|
|
print(f"Force including {len(demucs_files)} files from demucs_repo/demucs/ -> demucs/")
|
|
else:
|
|
print(f"Warning: {demucs_pkg_dir} does not exist", file=sys.stderr)
|
|
|
|
# Force include all .mo files from buzz/locale directory
|
|
locale_dir = project_root / "buzz" / "locale"
|
|
if locale_dir.exists():
|
|
# Get all .mo files in the locale directory
|
|
locale_files = glob.glob(str(locale_dir / "**" / "*.mo"), recursive=True)
|
|
|
|
# Add them to force_include
|
|
if 'force_include' not in build_data:
|
|
build_data['force_include'] = {}
|
|
|
|
for file_path in locale_files:
|
|
# Convert to relative path from project root
|
|
rel_path = Path(file_path).relative_to(project_root)
|
|
build_data['force_include'][str(rel_path)] = str(rel_path)
|
|
|
|
print(f"Force including {len(locale_files)} .mo files from buzz/locale/")
|
|
else:
|
|
print(f"Warning: {locale_dir} does not exist", file=sys.stderr)
|
|
|
|
# Force include compiled extensions from ctc_forced_aligner
|
|
ctc_aligner_pkg = project_root / "ctc_forced_aligner" / "ctc_forced_aligner"
|
|
if ctc_aligner_pkg.exists():
|
|
# Get all compiled extension files (.so, .pyd, .dll)
|
|
extension_patterns = ["*.so", "*.pyd", "*.dll"]
|
|
extension_files = []
|
|
for pattern in extension_patterns:
|
|
extension_files.extend(glob.glob(str(ctc_aligner_pkg / pattern)))
|
|
|
|
# Add them to force_include
|
|
if 'force_include' not in build_data:
|
|
build_data['force_include'] = {}
|
|
|
|
for file_path in extension_files:
|
|
# Convert to relative path from project root
|
|
rel_path = Path(file_path).relative_to(project_root)
|
|
build_data['force_include'][str(rel_path)] = str(rel_path)
|
|
|
|
print(f"Force including {len(extension_files)} compiled extension(s) from ctc_forced_aligner/")
|
|
else:
|
|
print(f"Warning: {ctc_aligner_pkg} does not exist", file=sys.stderr)
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
print(f"Error building whisper.cpp: {e}", file=sys.stderr)
|
|
print(f"stdout: {e.stdout}", file=sys.stderr)
|
|
print(f"stderr: {e.stderr}", file=sys.stderr)
|
|
sys.exit(1)
|
|
except FileNotFoundError:
|
|
print("Error: 'make' command not found. Please ensure make is installed.", file=sys.stderr)
|
|
sys.exit(1)
|