buzz/hatch_build.py
2026-02-20 15:47:13 +02:00

227 lines
10 KiB
Python

"""Custom build hook for hatchling to build whisper.cpp binaries."""
import glob
import subprocess
import sys
from pathlib import Path
from hatchling.builders.hooks.plugin.interface import BuildHookInterface
class CustomBuildHook(BuildHookInterface):
"""Build hook to compile whisper.cpp before building the package."""
def initialize(self, version, build_data):
"""Run make buzz/whisper_cpp before building."""
print("Running 'make buzz/whisper_cpp' to build whisper.cpp binaries...")
# Mark wheel as platform-specific since we're including compiled binaries
# But set tag to py3-none since binaries are standalone (no Python C extensions)
if version == "standard": # Only for wheel builds
import platform
build_data["pure_python"] = False
# Determine the platform tag based on current OS and architecture
system = platform.system().lower()
machine = platform.machine().lower()
if system == "linux":
if machine in ("x86_64", "amd64"):
tag = "py3-none-manylinux_2_34_x86_64"
else:
raise ValueError(f"Unsupported Linux architecture: {machine}. Only x86_64 is supported.")
elif system == "darwin":
if machine in ("x86_64", "amd64"):
tag = "py3-none-macosx_10_9_x86_64"
elif machine in ("arm64", "aarch64"):
tag = "py3-none-macosx_11_0_arm64"
else:
raise ValueError(f"Unsupported macOS architecture: {machine}")
elif system == "windows":
if machine in ("x86_64", "amd64"):
tag = "py3-none-win_amd64"
else:
raise ValueError(f"Unsupported Windows architecture: {machine}. Only x86_64 is supported.")
else:
raise ValueError(f"Unsupported operating system: {system}")
if tag:
build_data["tag"] = tag
print(f"Building wheel with tag: {tag}")
# Get the project root directory
project_root = Path(self.root)
try:
# Run the make command
result = subprocess.run(
["make", "buzz/whisper_cpp"],
cwd=project_root,
check=True,
capture_output=True,
text=True
)
print(result.stdout)
if result.stderr:
print(result.stderr, file=sys.stderr)
print("Successfully built whisper.cpp binaries")
# Run the make command for translation files
result = subprocess.run(
["make", "translation_mo"],
cwd=project_root,
check=True,
capture_output=True,
text=True
)
print(result.stdout)
if result.stderr:
print(result.stderr, file=sys.stderr)
print("Successfully compiled translation files")
# Build ctc_forced_aligner C++ extension in-place
print("Building ctc_forced_aligner C++ extension...")
ctc_aligner_dir = project_root / "ctc_forced_aligner"
# Apply local patches before building.
# Uses --check first to avoid touching the working tree unnecessarily,
# which is safer in a detached-HEAD submodule.
patches_dir = project_root / "patches"
for patch_file in sorted(patches_dir.glob("ctc_forced_aligner_*.patch")):
# Dry-run forward: succeeds only if patch is NOT yet applied.
check_forward = subprocess.run(
["git", "apply", "--check", "--ignore-whitespace", str(patch_file)],
cwd=ctc_aligner_dir,
capture_output=True,
text=True,
)
if check_forward.returncode == 0:
# Patch can be applied — do it for real.
subprocess.run(
["git", "apply", "--ignore-whitespace", str(patch_file)],
cwd=ctc_aligner_dir,
check=True,
capture_output=True,
text=True,
)
print(f"Applied patch: {patch_file.name}")
else:
# Dry-run failed — either already applied or genuinely broken.
check_reverse = subprocess.run(
["git", "apply", "--check", "--reverse", "--ignore-whitespace", str(patch_file)],
cwd=ctc_aligner_dir,
capture_output=True,
text=True,
)
if check_reverse.returncode == 0:
print(f"Patch already applied (skipping): {patch_file.name}")
else:
print(f"WARNING: could not apply patch {patch_file.name}: {check_forward.stderr}", file=sys.stderr)
result = subprocess.run(
[sys.executable, "setup.py", "build_ext", "--inplace"],
cwd=ctc_aligner_dir,
check=True,
capture_output=True,
text=True
)
print(result.stdout)
if result.stderr:
print(result.stderr, file=sys.stderr)
print("Successfully built ctc_forced_aligner C++ extension")
# Force include all files in buzz/whisper_cpp directory
whisper_cpp_dir = project_root / "buzz" / "whisper_cpp"
if whisper_cpp_dir.exists():
# Get all files in the whisper_cpp directory
whisper_files = glob.glob(str(whisper_cpp_dir / "**" / "*"), recursive=True)
# Filter only files (not directories)
whisper_files = [f for f in whisper_files if Path(f).is_file()]
# Add them to force_include
if 'force_include' not in build_data:
build_data['force_include'] = {}
for file_path in whisper_files:
# Convert to relative path from project root
rel_path = Path(file_path).relative_to(project_root)
build_data['force_include'][str(rel_path)] = str(rel_path)
print(f"Force including {len(whisper_files)} files from buzz/whisper_cpp/")
else:
print(f"Warning: {whisper_cpp_dir} does not exist after build", file=sys.stderr)
# Force include demucs package at top level (demucs_repo/demucs -> demucs/)
demucs_pkg_dir = project_root / "demucs_repo" / "demucs"
if demucs_pkg_dir.exists():
# Get all files in the demucs package directory
demucs_files = glob.glob(str(demucs_pkg_dir / "**" / "*"), recursive=True)
# Filter only files (not directories)
demucs_files = [f for f in demucs_files if Path(f).is_file()]
# Add them to force_include, mapping to top-level demucs/
if 'force_include' not in build_data:
build_data['force_include'] = {}
for file_path in demucs_files:
# Convert to relative path from demucs package dir
rel_from_pkg = Path(file_path).relative_to(demucs_pkg_dir)
# Target path is demucs/<relative_path>
target_path = Path("demucs") / rel_from_pkg
build_data['force_include'][str(file_path)] = str(target_path)
print(f"Force including {len(demucs_files)} files from demucs_repo/demucs/ -> demucs/")
else:
print(f"Warning: {demucs_pkg_dir} does not exist", file=sys.stderr)
# Force include all .mo files from buzz/locale directory
locale_dir = project_root / "buzz" / "locale"
if locale_dir.exists():
# Get all .mo files in the locale directory
locale_files = glob.glob(str(locale_dir / "**" / "*.mo"), recursive=True)
# Add them to force_include
if 'force_include' not in build_data:
build_data['force_include'] = {}
for file_path in locale_files:
# Convert to relative path from project root
rel_path = Path(file_path).relative_to(project_root)
build_data['force_include'][str(rel_path)] = str(rel_path)
print(f"Force including {len(locale_files)} .mo files from buzz/locale/")
else:
print(f"Warning: {locale_dir} does not exist", file=sys.stderr)
# Force include compiled extensions from ctc_forced_aligner
ctc_aligner_pkg = project_root / "ctc_forced_aligner" / "ctc_forced_aligner"
if ctc_aligner_pkg.exists():
# Get all compiled extension files (.so, .pyd, .dll)
extension_patterns = ["*.so", "*.pyd", "*.dll"]
extension_files = []
for pattern in extension_patterns:
extension_files.extend(glob.glob(str(ctc_aligner_pkg / pattern)))
# Add them to force_include
if 'force_include' not in build_data:
build_data['force_include'] = {}
for file_path in extension_files:
# Convert to relative path from project root
rel_path = Path(file_path).relative_to(project_root)
build_data['force_include'][str(rel_path)] = str(rel_path)
print(f"Force including {len(extension_files)} compiled extension(s) from ctc_forced_aligner/")
else:
print(f"Warning: {ctc_aligner_pkg} does not exist", file=sys.stderr)
except subprocess.CalledProcessError as e:
print(f"Error building whisper.cpp: {e}", file=sys.stderr)
print(f"stdout: {e.stdout}", file=sys.stderr)
print(f"stderr: {e.stderr}", file=sys.stderr)
sys.exit(1)
except FileNotFoundError:
print("Error: 'make' command not found. Please ensure make is installed.", file=sys.stderr)
sys.exit(1)