transcoder.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. # Copyright (c) Meta Platforms, Inc. and affiliates.
  2. # All rights reserved.
  3. # This source code is licensed under the license found in the
  4. # LICENSE file in the root directory of this source tree.
  5. import ast
  6. import math
  7. import os
  8. import shutil
  9. import subprocess
  10. from dataclasses import dataclass
  11. from typing import Optional
  12. import av
  13. from app_conf import FFMPEG_NUM_THREADS
  14. from dataclasses_json import dataclass_json
  15. TRANSCODE_VERSION = 1
  16. @dataclass_json
  17. @dataclass
  18. class VideoMetadata:
  19. duration_sec: Optional[float]
  20. video_duration_sec: Optional[float]
  21. container_duration_sec: Optional[float]
  22. fps: Optional[float]
  23. width: Optional[int]
  24. height: Optional[int]
  25. num_video_frames: int
  26. num_video_streams: int
  27. video_start_time: float
  28. def transcode(
  29. in_path: str,
  30. out_path: str,
  31. in_metadata: Optional[VideoMetadata],
  32. seek_t: float,
  33. duration_time_sec: float,
  34. ):
  35. codec = os.environ.get("VIDEO_ENCODE_CODEC", "libx264")
  36. crf = int(os.environ.get("VIDEO_ENCODE_CRF", "23"))
  37. fps = int(os.environ.get("VIDEO_ENCODE_FPS", "24"))
  38. max_w = int(os.environ.get("VIDEO_ENCODE_MAX_WIDTH", "1280"))
  39. max_h = int(os.environ.get("VIDEO_ENCODE_MAX_HEIGHT", "720"))
  40. verbose = ast.literal_eval(os.environ.get("VIDEO_ENCODE_VERBOSE", "False"))
  41. normalize_video(
  42. in_path=in_path,
  43. out_path=out_path,
  44. max_w=max_w,
  45. max_h=max_h,
  46. seek_t=seek_t,
  47. max_time=duration_time_sec,
  48. in_metadata=in_metadata,
  49. codec=codec,
  50. crf=crf,
  51. fps=fps,
  52. verbose=verbose,
  53. )
  54. def get_video_metadata(path: str) -> VideoMetadata:
  55. with av.open(path) as cont:
  56. num_video_streams = len(cont.streams.video)
  57. width, height, fps = None, None, None
  58. video_duration_sec = 0
  59. container_duration_sec = float((cont.duration or 0) / av.time_base)
  60. video_start_time = 0.0
  61. rotation_deg = 0
  62. num_video_frames = 0
  63. if num_video_streams > 0:
  64. video_stream = cont.streams.video[0]
  65. assert video_stream.time_base is not None
  66. # for rotation, see: https://github.com/PyAV-Org/PyAV/pull/1249
  67. rotation_deg = video_stream.side_data.get("DISPLAYMATRIX", 0)
  68. num_video_frames = video_stream.frames
  69. video_start_time = float(video_stream.start_time * video_stream.time_base)
  70. width, height = video_stream.width, video_stream.height
  71. fps = float(video_stream.guessed_rate)
  72. fps_avg = video_stream.average_rate
  73. if video_stream.duration is not None:
  74. video_duration_sec = float(
  75. video_stream.duration * video_stream.time_base
  76. )
  77. if fps is None:
  78. fps = float(fps_avg)
  79. if not math.isnan(rotation_deg) and int(rotation_deg) in (
  80. 90,
  81. -90,
  82. 270,
  83. -270,
  84. ):
  85. width, height = height, width
  86. duration_sec = max(container_duration_sec, video_duration_sec)
  87. return VideoMetadata(
  88. duration_sec=duration_sec,
  89. container_duration_sec=container_duration_sec,
  90. video_duration_sec=video_duration_sec,
  91. video_start_time=video_start_time,
  92. fps=fps,
  93. width=width,
  94. height=height,
  95. num_video_streams=num_video_streams,
  96. num_video_frames=num_video_frames,
  97. )
  98. def normalize_video(
  99. in_path: str,
  100. out_path: str,
  101. max_w: int,
  102. max_h: int,
  103. seek_t: float,
  104. max_time: float,
  105. in_metadata: Optional[VideoMetadata],
  106. codec: str = "libx264",
  107. crf: int = 23,
  108. fps: int = 24,
  109. verbose: bool = False,
  110. ):
  111. if in_metadata is None:
  112. in_metadata = get_video_metadata(in_path)
  113. assert in_metadata.num_video_streams > 0, "no video stream present"
  114. w, h = in_metadata.width, in_metadata.height
  115. assert w is not None, "width not available"
  116. assert h is not None, "height not available"
  117. # rescale to max_w:max_h if needed & preserve aspect ratio
  118. r = w / h
  119. if r < 1:
  120. h = min(720, h)
  121. w = h * r
  122. else:
  123. w = min(1280, w)
  124. h = w / r
  125. # h264 cannot encode w/ odd dimensions
  126. w = int(w)
  127. h = int(h)
  128. if w % 2 != 0:
  129. w += 1
  130. if h % 2 != 0:
  131. h += 1
  132. ffmpeg = shutil.which("ffmpeg")
  133. cmd = [
  134. ffmpeg,
  135. "-threads",
  136. f"{FFMPEG_NUM_THREADS}", # global threads
  137. "-ss",
  138. f"{seek_t:.2f}",
  139. "-t",
  140. f"{max_time:.2f}",
  141. "-i",
  142. in_path,
  143. "-threads",
  144. f"{FFMPEG_NUM_THREADS}", # decode (or filter..?) threads
  145. "-vf",
  146. f"fps={fps},scale={w}:{h},setsar=1:1",
  147. "-c:v",
  148. codec,
  149. "-crf",
  150. f"{crf}",
  151. "-pix_fmt",
  152. "yuv420p",
  153. "-threads",
  154. f"{FFMPEG_NUM_THREADS}", # encode threads
  155. out_path,
  156. "-y",
  157. ]
  158. if verbose:
  159. print(" ".join(cmd))
  160. subprocess.call(
  161. cmd,
  162. stdout=None if verbose else subprocess.DEVNULL,
  163. stderr=None if verbose else subprocess.DEVNULL,
  164. )