2026-05-19-00-11-40 新增Ubuntu配音工作流

2026-05-19 00:22:10 +08:00
parent 6f63ae714c
commit ee8a28da78
12 changed files with 1034 additions and 0 deletions
--- a/Tools_scripts_XunFei-Ubuntu/build_final_video_ubuntu.py
+++ b/Tools_scripts_XunFei-Ubuntu/build_final_video_ubuntu.py
@@ -0,0 +1,232 @@
+#!/usr/bin/env python3
+"""Build a final voice-over video on Ubuntu with ffmpeg."""
+
+from __future__ import annotations
+
+import argparse
+import shutil
+import subprocess
+from pathlib import Path
+
+
+AUDIO_EXTS = {".mp3", ".wav", ".m4a", ".aac", ".flac", ".ogg"}
+
+
+def run(cmd: list[str]) -> None:
+    print("+ " + " ".join(cmd))
+    subprocess.run(cmd, check=True)
+
+
+def require_tool(name: str) -> str:
+    path = shutil.which(name)
+    if not path:
+        raise SystemExit(f"{name} is required. Install it with: sudo apt install -y ffmpeg")
+    return path
+
+
+def media_duration(path: Path) -> float:
+    result = subprocess.check_output(
+        [
+            "ffprobe",
+            "-v",
+            "error",
+            "-show_entries",
+            "format=duration",
+            "-of",
+            "default=nw=1:nk=1",
+            str(path),
+        ],
+        text=True,
+    ).strip()
+    return float(result)
+
+
+def audio_files(audio_dir: Path) -> list[Path]:
+    files = [
+        path
+        for path in sorted(audio_dir.iterdir())
+        if path.is_file() and path.suffix.lower() in AUDIO_EXTS
+    ]
+    if not files:
+        raise FileNotFoundError(f"No audio files found in {audio_dir}")
+    return files
+
+
+def concat_audio_dir(audio_dir: Path, work_dir: Path, silence: float) -> Path:
+    work_dir.mkdir(parents=True, exist_ok=True)
+    normalized: list[Path] = []
+    silence_path = work_dir / "silence.wav"
+    run(
+        [
+            "ffmpeg",
+            "-hide_banner",
+            "-loglevel",
+            "error",
+            "-y",
+            "-f",
+            "lavfi",
+            "-t",
+            f"{silence:.3f}",
+            "-i",
+            "anullsrc=channel_layout=stereo:sample_rate=48000",
+            "-c:a",
+            "pcm_s16le",
+            str(silence_path),
+        ]
+    )
+
+    for index, src in enumerate(audio_files(audio_dir), start=1):
+        dst = work_dir / f"audio_{index:02d}.wav"
+        run(
+            [
+                "ffmpeg",
+                "-hide_banner",
+                "-loglevel",
+                "error",
+                "-y",
+                "-i",
+                str(src),
+                "-vn",
+                "-ar",
+                "48000",
+                "-ac",
+                "2",
+                "-c:a",
+                "pcm_s16le",
+                str(dst),
+            ]
+        )
+        normalized.append(dst)
+
+    concat_items: list[Path] = []
+    for index, item in enumerate(normalized):
+        concat_items.append(item)
+        if index != len(normalized) - 1 and silence > 0:
+            concat_items.append(silence_path)
+
+    list_path = work_dir / "audio_concat.txt"
+    with list_path.open("w", encoding="utf-8") as handle:
+        for item in concat_items:
+            escaped = item.resolve().as_posix().replace("'", "'\\''")
+            handle.write(f"file '{escaped}'\n")
+
+    out_audio = work_dir / "combined_voice.wav"
+    run(
+        [
+            "ffmpeg",
+            "-hide_banner",
+            "-loglevel",
+            "error",
+            "-y",
+            "-f",
+            "concat",
+            "-safe",
+            "0",
+            "-i",
+            str(list_path),
+            "-c:a",
+            "pcm_s16le",
+            str(out_audio),
+        ]
+    )
+    return out_audio
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Combine one video with voice-over audio.")
+    parser.add_argument("--video", type=Path, required=True, help="Source video path.")
+    parser.add_argument("--audio", type=Path, default=None, help="Single voice-over audio file.")
+    parser.add_argument("--audio-dir", type=Path, default=None, help="Directory of ordered audio files.")
+    parser.add_argument("--output", type=Path, default=Path("05_outputs/final_voiceover.mp4"))
+    parser.add_argument("--work-dir", type=Path, default=Path("04_intermediate/ubuntu_voiceover"))
+    parser.add_argument("--silence", type=float, default=0.35, help="Gap seconds between audio files.")
+    parser.add_argument("--width", type=int, default=1920)
+    parser.add_argument("--height", type=int, default=1080)
+    parser.add_argument("--fps", type=int, default=30)
+    parser.add_argument("--crf", type=int, default=20)
+    parser.add_argument("--preset", default="medium")
+    parser.add_argument("--video-speed", type=float, default=None, help="Override automatic speed.")
+    return parser.parse_args()
+
+
+def main() -> int:
+    args = parse_args()
+    require_tool("ffmpeg")
+    require_tool("ffprobe")
+
+    if not args.video.exists():
+        raise FileNotFoundError(args.video)
+    if bool(args.audio) == bool(args.audio_dir):
+        raise SystemExit("Use exactly one of --audio or --audio-dir.")
+
+    args.work_dir.mkdir(parents=True, exist_ok=True)
+    args.output.parent.mkdir(parents=True, exist_ok=True)
+
+    audio_path = args.audio if args.audio else concat_audio_dir(args.audio_dir, args.work_dir, args.silence)
+    if not audio_path or not audio_path.exists():
+        raise FileNotFoundError(audio_path)
+
+    video_duration = media_duration(args.video)
+    audio_duration = media_duration(audio_path)
+    if video_duration <= 0 or audio_duration <= 0:
+        raise RuntimeError("Invalid media duration.")
+
+    speed = args.video_speed if args.video_speed else video_duration / audio_duration
+    if speed <= 0:
+        raise ValueError("--video-speed must be greater than 0.")
+
+    print(f"video_duration={video_duration:.3f}s")
+    print(f"audio_duration={audio_duration:.3f}s")
+    print(f"video_speed={speed:.6f}x")
+
+    vf = (
+        f"[0:v]setpts=PTS/{speed:.8f},fps={args.fps},"
+        f"scale={args.width}:{args.height}:force_original_aspect_ratio=decrease,"
+        f"pad={args.width}:{args.height}:(ow-iw)/2:(oh-ih)/2:black,"
+        "setsar=1,format=yuv420p[v];"
+        "[1:a]aresample=48000,apad[a]"
+    )
+    run(
+        [
+            "ffmpeg",
+            "-hide_banner",
+            "-y",
+            "-i",
+            str(args.video),
+            "-i",
+            str(audio_path),
+            "-filter_complex",
+            vf,
+            "-map",
+            "[v]",
+            "-map",
+            "[a]",
+            "-t",
+            f"{audio_duration:.3f}",
+            "-c:v",
+            "libx264",
+            "-preset",
+            args.preset,
+            "-crf",
+            str(args.crf),
+            "-c:a",
+            "aac",
+            "-b:a",
+            "192k",
+            "-ar",
+            "48000",
+            "-ac",
+            "2",
+            "-movflags",
+            "+faststart",
+            str(args.output),
+        ]
+    )
+    final_duration = media_duration(args.output)
+    print(f"output={args.output}")
+    print(f"final_duration={final_duration:.3f}s")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())