sam1_utils.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
  2. # All rights reserved.
  3. # pyre-unsafe
  4. # This source code is licensed under the license found in the
  5. # LICENSE file in the root directory of this source tree.
  6. import warnings
  7. import torch
  8. import torch.nn as nn
  9. import torch.nn.functional as F
  10. from torchvision.transforms import Normalize, Resize, ToTensor
  11. # Adapted from https://github.com/facebookresearch/sam2/blob/main/sam2/utils/transforms.py
  12. class SAM2Transforms(nn.Module):
  13. def __init__(
  14. self, resolution, mask_threshold, max_hole_area=0.0, max_sprinkle_area=0.0
  15. ):
  16. """
  17. Transforms for SAM2.
  18. """
  19. super().__init__()
  20. self.resolution = resolution
  21. self.mask_threshold = mask_threshold
  22. self.max_hole_area = max_hole_area
  23. self.max_sprinkle_area = max_sprinkle_area
  24. self.mean = [0.5, 0.5, 0.5]
  25. self.std = [0.5, 0.5, 0.5]
  26. self.to_tensor = ToTensor()
  27. self.transforms = torch.jit.script(
  28. nn.Sequential(
  29. Resize((self.resolution, self.resolution)),
  30. Normalize(self.mean, self.std),
  31. )
  32. )
  33. def __call__(self, x):
  34. x = self.to_tensor(x)
  35. return self.transforms(x)
  36. def forward_batch(self, img_list):
  37. img_batch = [self.transforms(self.to_tensor(img)) for img in img_list]
  38. img_batch = torch.stack(img_batch, dim=0)
  39. return img_batch
  40. def transform_coords(
  41. self, coords: torch.Tensor, normalize=False, orig_hw=None
  42. ) -> torch.Tensor:
  43. """
  44. Expects a torch tensor with length 2 in the last dimension. The coordinates can be in absolute image or normalized coordinates,
  45. If the coords are in absolute image coordinates, normalize should be set to True and original image size is required.
  46. Returns
  47. Un-normalized coordinates in the range of [0, 1] which is expected by the SAM2 model.
  48. """
  49. if normalize:
  50. assert orig_hw is not None
  51. h, w = orig_hw
  52. coords = coords.clone()
  53. coords[..., 0] = coords[..., 0] / w
  54. coords[..., 1] = coords[..., 1] / h
  55. coords = coords * self.resolution # unnormalize coords
  56. return coords
  57. def transform_boxes(
  58. self, boxes: torch.Tensor, normalize=False, orig_hw=None
  59. ) -> torch.Tensor:
  60. """
  61. Expects a tensor of shape Bx4. The coordinates can be in absolute image or normalized coordinates,
  62. if the coords are in absolute image coordinates, normalize should be set to True and original image size is required.
  63. """
  64. boxes = self.transform_coords(boxes.reshape(-1, 2, 2), normalize, orig_hw)
  65. return boxes
  66. def postprocess_masks(self, masks: torch.Tensor, orig_hw) -> torch.Tensor:
  67. """
  68. Perform PostProcessing on output masks.
  69. """
  70. masks = masks.float()
  71. input_masks = masks
  72. mask_flat = masks.flatten(0, 1).unsqueeze(1) # flatten as 1-channel image
  73. try:
  74. from sam3.perflib.connected_components import connected_components
  75. if self.max_hole_area > 0:
  76. # Holes are those connected components in background with area <= self.fill_hole_area
  77. # (background regions are those with mask scores <= self.mask_threshold)
  78. labels, areas = connected_components(
  79. (mask_flat <= self.mask_threshold).to(torch.uint8)
  80. )
  81. is_hole = (labels > 0) & (areas <= self.max_hole_area)
  82. is_hole = is_hole.reshape_as(masks)
  83. # We fill holes with a small positive mask score (10.0) to change them to foreground.
  84. masks = torch.where(is_hole, self.mask_threshold + 10.0, masks)
  85. if self.max_sprinkle_area > 0:
  86. labels, areas = connected_components(
  87. (mask_flat > self.mask_threshold).to(torch.uint8)
  88. )
  89. is_hole = (labels > 0) & (areas <= self.max_sprinkle_area)
  90. is_hole = is_hole.reshape_as(masks)
  91. # We fill holes with negative mask score (-10.0) to change them to background.
  92. masks = torch.where(is_hole, self.mask_threshold - 10.0, masks)
  93. except Exception as e:
  94. # Skip the post-processing step if the CUDA kernel fails
  95. warnings.warn(
  96. f"{e}\n\nSkipping the post-processing step due to the error above. You can "
  97. "still use SAM 3 and it's OK to ignore the error above, although some post-processing "
  98. "functionality may be limited (which doesn't affect the results in most cases; see "
  99. "https://github.com/facebookresearch/sam3/blob/main/INSTALL.md).",
  100. category=UserWarning,
  101. stacklevel=2,
  102. )
  103. masks = input_masks
  104. masks = F.interpolate(masks, orig_hw, mode="bilinear", align_corners=False)
  105. return masks