amg.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. # Copyright (c) Meta Platforms, Inc. and affiliates.
  2. # All rights reserved.
  3. # This source code is licensed under the license found in the
  4. # LICENSE file in the root directory of this source tree.
  5. import math
  6. from copy import deepcopy
  7. from itertools import product
  8. from typing import Any, Dict, Generator, ItemsView, List, Tuple
  9. import numpy as np
  10. import torch
  11. # Very lightly adapted from https://github.com/facebookresearch/segment-anything/blob/main/segment_anything/utils/amg.py
  12. class MaskData:
  13. """
  14. A structure for storing masks and their related data in batched format.
  15. Implements basic filtering and concatenation.
  16. """
  17. def __init__(self, **kwargs) -> None:
  18. for v in kwargs.values():
  19. assert isinstance(
  20. v, (list, np.ndarray, torch.Tensor)
  21. ), "MaskData only supports list, numpy arrays, and torch tensors."
  22. self._stats = dict(**kwargs)
  23. def __setitem__(self, key: str, item: Any) -> None:
  24. assert isinstance(
  25. item, (list, np.ndarray, torch.Tensor)
  26. ), "MaskData only supports list, numpy arrays, and torch tensors."
  27. self._stats[key] = item
  28. def __delitem__(self, key: str) -> None:
  29. del self._stats[key]
  30. def __getitem__(self, key: str) -> Any:
  31. return self._stats[key]
  32. def items(self) -> ItemsView[str, Any]:
  33. return self._stats.items()
  34. def filter(self, keep: torch.Tensor) -> None:
  35. for k, v in self._stats.items():
  36. if v is None:
  37. self._stats[k] = None
  38. elif isinstance(v, torch.Tensor):
  39. self._stats[k] = v[torch.as_tensor(keep, device=v.device)]
  40. elif isinstance(v, np.ndarray):
  41. self._stats[k] = v[keep.detach().cpu().numpy()]
  42. elif isinstance(v, list) and keep.dtype == torch.bool:
  43. self._stats[k] = [a for i, a in enumerate(v) if keep[i]]
  44. elif isinstance(v, list):
  45. self._stats[k] = [v[i] for i in keep]
  46. else:
  47. raise TypeError(f"MaskData key {k} has an unsupported type {type(v)}.")
  48. def cat(self, new_stats: "MaskData") -> None:
  49. for k, v in new_stats.items():
  50. if k not in self._stats or self._stats[k] is None:
  51. self._stats[k] = deepcopy(v)
  52. elif isinstance(v, torch.Tensor):
  53. self._stats[k] = torch.cat([self._stats[k], v], dim=0)
  54. elif isinstance(v, np.ndarray):
  55. self._stats[k] = np.concatenate([self._stats[k], v], axis=0)
  56. elif isinstance(v, list):
  57. self._stats[k] = self._stats[k] + deepcopy(v)
  58. else:
  59. raise TypeError(f"MaskData key {k} has an unsupported type {type(v)}.")
  60. def to_numpy(self) -> None:
  61. for k, v in self._stats.items():
  62. if isinstance(v, torch.Tensor):
  63. self._stats[k] = v.float().detach().cpu().numpy()
  64. def is_box_near_crop_edge(
  65. boxes: torch.Tensor, crop_box: List[int], orig_box: List[int], atol: float = 20.0
  66. ) -> torch.Tensor:
  67. """Filter masks at the edge of a crop, but not at the edge of the original image."""
  68. crop_box_torch = torch.as_tensor(crop_box, dtype=torch.float, device=boxes.device)
  69. orig_box_torch = torch.as_tensor(orig_box, dtype=torch.float, device=boxes.device)
  70. boxes = uncrop_boxes_xyxy(boxes, crop_box).float()
  71. near_crop_edge = torch.isclose(boxes, crop_box_torch[None, :], atol=atol, rtol=0)
  72. near_image_edge = torch.isclose(boxes, orig_box_torch[None, :], atol=atol, rtol=0)
  73. near_crop_edge = torch.logical_and(near_crop_edge, ~near_image_edge)
  74. return torch.any(near_crop_edge, dim=1)
  75. def box_xyxy_to_xywh(box_xyxy: torch.Tensor) -> torch.Tensor:
  76. box_xywh = deepcopy(box_xyxy)
  77. box_xywh[2] = box_xywh[2] - box_xywh[0]
  78. box_xywh[3] = box_xywh[3] - box_xywh[1]
  79. return box_xywh
  80. def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]:
  81. assert len(args) > 0 and all(
  82. len(a) == len(args[0]) for a in args
  83. ), "Batched iteration must have inputs of all the same size."
  84. n_batches = len(args[0]) // batch_size + int(len(args[0]) % batch_size != 0)
  85. for b in range(n_batches):
  86. yield [arg[b * batch_size : (b + 1) * batch_size] for arg in args]
  87. def mask_to_rle_pytorch(tensor: torch.Tensor) -> List[Dict[str, Any]]:
  88. """
  89. Encodes masks to an uncompressed RLE, in the format expected by
  90. pycoco tools.
  91. """
  92. # Put in fortran order and flatten h,w
  93. b, h, w = tensor.shape
  94. tensor = tensor.permute(0, 2, 1).flatten(1)
  95. # Compute change indices
  96. diff = tensor[:, 1:] ^ tensor[:, :-1]
  97. change_indices = diff.nonzero()
  98. # Encode run length
  99. out = []
  100. for i in range(b):
  101. cur_idxs = change_indices[change_indices[:, 0] == i, 1]
  102. cur_idxs = torch.cat(
  103. [
  104. torch.tensor([0], dtype=cur_idxs.dtype, device=cur_idxs.device),
  105. cur_idxs + 1,
  106. torch.tensor([h * w], dtype=cur_idxs.dtype, device=cur_idxs.device),
  107. ]
  108. )
  109. btw_idxs = cur_idxs[1:] - cur_idxs[:-1]
  110. counts = [] if tensor[i, 0] == 0 else [0]
  111. counts.extend(btw_idxs.detach().cpu().tolist())
  112. out.append({"size": [h, w], "counts": counts})
  113. return out
  114. def rle_to_mask(rle: Dict[str, Any]) -> np.ndarray:
  115. """Compute a binary mask from an uncompressed RLE."""
  116. h, w = rle["size"]
  117. mask = np.empty(h * w, dtype=bool)
  118. idx = 0
  119. parity = False
  120. for count in rle["counts"]:
  121. mask[idx : idx + count] = parity
  122. idx += count
  123. parity ^= True
  124. mask = mask.reshape(w, h)
  125. return mask.transpose() # Put in C order
  126. def area_from_rle(rle: Dict[str, Any]) -> int:
  127. return sum(rle["counts"][1::2])
  128. def calculate_stability_score(
  129. masks: torch.Tensor, mask_threshold: float, threshold_offset: float
  130. ) -> torch.Tensor:
  131. """
  132. Computes the stability score for a batch of masks. The stability
  133. score is the IoU between the binary masks obtained by thresholding
  134. the predicted mask logits at high and low values.
  135. """
  136. # One mask is always contained inside the other.
  137. # Save memory by preventing unnecessary cast to torch.int64
  138. intersections = (
  139. (masks > (mask_threshold + threshold_offset))
  140. .sum(-1, dtype=torch.int16)
  141. .sum(-1, dtype=torch.int32)
  142. )
  143. unions = (
  144. (masks > (mask_threshold - threshold_offset))
  145. .sum(-1, dtype=torch.int16)
  146. .sum(-1, dtype=torch.int32)
  147. )
  148. return intersections / unions
  149. def build_point_grid(n_per_side: int) -> np.ndarray:
  150. """Generates a 2D grid of points evenly spaced in [0,1]x[0,1]."""
  151. offset = 1 / (2 * n_per_side)
  152. points_one_side = np.linspace(offset, 1 - offset, n_per_side)
  153. points_x = np.tile(points_one_side[None, :], (n_per_side, 1))
  154. points_y = np.tile(points_one_side[:, None], (1, n_per_side))
  155. points = np.stack([points_x, points_y], axis=-1).reshape(-1, 2)
  156. return points
  157. def build_all_layer_point_grids(
  158. n_per_side: int, n_layers: int, scale_per_layer: int
  159. ) -> List[np.ndarray]:
  160. """Generates point grids for all crop layers."""
  161. points_by_layer = []
  162. for i in range(n_layers + 1):
  163. n_points = int(n_per_side / (scale_per_layer**i))
  164. points_by_layer.append(build_point_grid(n_points))
  165. return points_by_layer
  166. def generate_crop_boxes(
  167. im_size: Tuple[int, ...], n_layers: int, overlap_ratio: float
  168. ) -> Tuple[List[List[int]], List[int]]:
  169. """
  170. Generates a list of crop boxes of different sizes. Each layer
  171. has (2**i)**2 boxes for the ith layer.
  172. """
  173. crop_boxes, layer_idxs = [], []
  174. im_h, im_w = im_size
  175. short_side = min(im_h, im_w)
  176. # Original image
  177. crop_boxes.append([0, 0, im_w, im_h])
  178. layer_idxs.append(0)
  179. def crop_len(orig_len, n_crops, overlap):
  180. return int(math.ceil((overlap * (n_crops - 1) + orig_len) / n_crops))
  181. for i_layer in range(n_layers):
  182. n_crops_per_side = 2 ** (i_layer + 1)
  183. overlap = int(overlap_ratio * short_side * (2 / n_crops_per_side))
  184. crop_w = crop_len(im_w, n_crops_per_side, overlap)
  185. crop_h = crop_len(im_h, n_crops_per_side, overlap)
  186. crop_box_x0 = [int((crop_w - overlap) * i) for i in range(n_crops_per_side)]
  187. crop_box_y0 = [int((crop_h - overlap) * i) for i in range(n_crops_per_side)]
  188. # Crops in XYWH format
  189. for x0, y0 in product(crop_box_x0, crop_box_y0):
  190. box = [x0, y0, min(x0 + crop_w, im_w), min(y0 + crop_h, im_h)]
  191. crop_boxes.append(box)
  192. layer_idxs.append(i_layer + 1)
  193. return crop_boxes, layer_idxs
  194. def uncrop_boxes_xyxy(boxes: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
  195. x0, y0, _, _ = crop_box
  196. offset = torch.tensor([[x0, y0, x0, y0]], device=boxes.device)
  197. # Check if boxes has a channel dimension
  198. if len(boxes.shape) == 3:
  199. offset = offset.unsqueeze(1)
  200. return boxes + offset
  201. def uncrop_points(points: torch.Tensor, crop_box: List[int]) -> torch.Tensor:
  202. x0, y0, _, _ = crop_box
  203. offset = torch.tensor([[x0, y0]], device=points.device)
  204. # Check if points has a channel dimension
  205. if len(points.shape) == 3:
  206. offset = offset.unsqueeze(1)
  207. return points + offset
  208. def uncrop_masks(
  209. masks: torch.Tensor, crop_box: List[int], orig_h: int, orig_w: int
  210. ) -> torch.Tensor:
  211. x0, y0, x1, y1 = crop_box
  212. if x0 == 0 and y0 == 0 and x1 == orig_w and y1 == orig_h:
  213. return masks
  214. # Coordinate transform masks
  215. pad_x, pad_y = orig_w - (x1 - x0), orig_h - (y1 - y0)
  216. pad = (x0, pad_x - x0, y0, pad_y - y0)
  217. return torch.nn.functional.pad(masks, pad, value=0)
  218. def remove_small_regions(
  219. mask: np.ndarray, area_thresh: float, mode: str
  220. ) -> Tuple[np.ndarray, bool]:
  221. """
  222. Removes small disconnected regions and holes in a mask. Returns the
  223. mask and an indicator of if the mask has been modified.
  224. """
  225. import cv2 # type: ignore
  226. assert mode in ["holes", "islands"]
  227. correct_holes = mode == "holes"
  228. working_mask = (correct_holes ^ mask).astype(np.uint8)
  229. n_labels, regions, stats, _ = cv2.connectedComponentsWithStats(working_mask, 8)
  230. sizes = stats[:, -1][1:] # Row 0 is background label
  231. small_regions = [i + 1 for i, s in enumerate(sizes) if s < area_thresh]
  232. if len(small_regions) == 0:
  233. return mask, False
  234. fill_labels = [0] + small_regions
  235. if not correct_holes:
  236. fill_labels = [i for i in range(n_labels) if i not in fill_labels]
  237. # If every region is below threshold, keep largest
  238. if len(fill_labels) == 0:
  239. fill_labels = [int(np.argmax(sizes)) + 1]
  240. mask = np.isin(regions, fill_labels)
  241. return mask, True
  242. def coco_encode_rle(uncompressed_rle: Dict[str, Any]) -> Dict[str, Any]:
  243. from pycocotools import mask as mask_utils # type: ignore
  244. h, w = uncompressed_rle["size"]
  245. rle = mask_utils.frPyObjects(uncompressed_rle, h, w)
  246. rle["counts"] = rle["counts"].decode("utf-8") # Necessary to serialize with json
  247. return rle
  248. def batched_mask_to_box(masks: torch.Tensor) -> torch.Tensor:
  249. """
  250. Calculates boxes in XYXY format around masks. Return [0,0,0,0] for
  251. an empty mask. For input shape C1xC2x...xHxW, the output shape is C1xC2x...x4.
  252. """
  253. # torch.max below raises an error on empty inputs, just skip in this case
  254. if torch.numel(masks) == 0:
  255. return torch.zeros(*masks.shape[:-2], 4, device=masks.device)
  256. # Normalize shape to CxHxW
  257. shape = masks.shape
  258. h, w = shape[-2:]
  259. if len(shape) > 2:
  260. masks = masks.flatten(0, -3)
  261. else:
  262. masks = masks.unsqueeze(0)
  263. # Get top and bottom edges
  264. in_height, _ = torch.max(masks, dim=-1)
  265. in_height_coords = in_height * torch.arange(h, device=in_height.device)[None, :]
  266. bottom_edges, _ = torch.max(in_height_coords, dim=-1)
  267. in_height_coords = in_height_coords + h * (~in_height)
  268. top_edges, _ = torch.min(in_height_coords, dim=-1)
  269. # Get left and right edges
  270. in_width, _ = torch.max(masks, dim=-2)
  271. in_width_coords = in_width * torch.arange(w, device=in_width.device)[None, :]
  272. right_edges, _ = torch.max(in_width_coords, dim=-1)
  273. in_width_coords = in_width_coords + w * (~in_width)
  274. left_edges, _ = torch.min(in_width_coords, dim=-1)
  275. # If the mask is empty the right edge will be to the left of the left edge.
  276. # Replace these boxes with [0, 0, 0, 0]
  277. empty_filter = (right_edges < left_edges) | (bottom_edges < top_edges)
  278. out = torch.stack([left_edges, top_edges, right_edges, bottom_edges], dim=-1)
  279. out = out * (~empty_filter).unsqueeze(-1)
  280. # Return to original shape
  281. if len(shape) > 2:
  282. out = out.reshape(*shape[:-2], 4)
  283. else:
  284. out = out[0]
  285. return out