| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899 |
- # Copyright (c) Meta Platforms, Inc. and affiliates.
- # All rights reserved.
- # This source code is licensed under the license found in the
- # LICENSE file in the root directory of this source tree.
- import torch
- import torch.nn as nn
- import torch.nn.functional as F
- from torchvision.transforms import Normalize, Resize, ToTensor
- class SAM2Transforms(nn.Module):
- def __init__(
- self, resolution, mask_threshold, max_hole_area=0.0, max_sprinkle_area=0.0
- ):
- """
- Transforms for SAM2.
- """
- super().__init__()
- self.resolution = resolution
- self.mask_threshold = mask_threshold
- self.max_hole_area = max_hole_area
- self.max_sprinkle_area = max_sprinkle_area
- self.mean = [0.485, 0.456, 0.406]
- self.std = [0.229, 0.224, 0.225]
- self.to_tensor = ToTensor()
- self.transforms = torch.jit.script(
- nn.Sequential(
- Resize((self.resolution, self.resolution)),
- Normalize(self.mean, self.std),
- )
- )
- def __call__(self, x):
- x = self.to_tensor(x)
- return self.transforms(x)
- def forward_batch(self, img_list):
- img_batch = [self.transforms(self.to_tensor(img)) for img in img_list]
- img_batch = torch.stack(img_batch, dim=0)
- return img_batch
- def transform_coords(
- self, coords: torch.Tensor, normalize=False, orig_hw=None
- ) -> torch.Tensor:
- """
- Expects a torch tensor with length 2 in the last dimension. The coordinates can be in absolute image or normalized coordinates,
- If the coords are in absolute image coordinates, normalize should be set to True and original image size is required.
- Returns
- Un-normalized coordinates in the range of [0, 1] which is expected by the SAM2 model.
- """
- if normalize:
- assert orig_hw is not None
- h, w = orig_hw
- coords = coords.clone()
- coords[..., 0] = coords[..., 0] / w
- coords[..., 1] = coords[..., 1] / h
- coords = coords * self.resolution # unnormalize coords
- return coords
- def transform_boxes(
- self, boxes: torch.Tensor, normalize=False, orig_hw=None
- ) -> torch.Tensor:
- """
- Expects a tensor of shape Bx4. The coordinates can be in absolute image or normalized coordinates,
- if the coords are in absolute image coordinates, normalize should be set to True and original image size is required.
- """
- boxes = self.transform_coords(boxes.reshape(-1, 2, 2), normalize, orig_hw)
- return boxes
- def postprocess_masks(self, masks: torch.Tensor, orig_hw) -> torch.Tensor:
- """
- Perform PostProcessing on output masks.
- """
- from sam2.utils.misc import get_connected_components
- masks = masks.float()
- if self.max_hole_area > 0:
- # Holes are those connected components in background with area <= self.fill_hole_area
- # (background regions are those with mask scores <= self.mask_threshold)
- mask_flat = masks.flatten(0, 1).unsqueeze(1) # flatten as 1-channel image
- labels, areas = get_connected_components(mask_flat <= self.mask_threshold)
- is_hole = (labels > 0) & (areas <= self.max_hole_area)
- is_hole = is_hole.reshape_as(masks)
- # We fill holes with a small positive mask score (10.0) to change them to foreground.
- masks = torch.where(is_hole, self.mask_threshold + 10.0, masks)
- if self.max_sprinkle_area > 0:
- labels, areas = get_connected_components(mask_flat > self.mask_threshold)
- is_hole = (labels > 0) & (areas <= self.max_sprinkle_area)
- is_hole = is_hole.reshape_as(masks)
- # We fill holes with negative mask score (-10.0) to change them to background.
- masks = torch.where(is_hole, self.mask_threshold - 10.0, masks)
- masks = F.interpolate(masks, orig_hw, mode="bilinear", align_corners=False)
- return masks
|