| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561 |
- # Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
- # pyre-unsafe
- import copy
- import itertools
- from typing import Any, Iterator, List, Union
- import numpy as np
- import pycocotools.mask as mask_util
- import torch
- from torch import device
- from .boxes import Boxes
- from .memory import retry_if_cuda_oom
- from .roi_align import ROIAlign
- def polygon_area(x, y):
- # Using the shoelace formula
- # https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
- return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))
- def polygons_to_bitmask(
- polygons: List[np.ndarray], height: int, width: int
- ) -> np.ndarray:
- """
- Args:
- polygons (list[ndarray]): each array has shape (Nx2,)
- height, width (int)
- Returns:
- ndarray: a bool mask of shape (height, width)
- """
- if len(polygons) == 0:
- # COCOAPI does not support empty polygons
- return np.zeros((height, width)).astype(bool)
- rles = mask_util.frPyObjects(polygons, height, width)
- rle = mask_util.merge(rles)
- return mask_util.decode(rle).astype(bool)
- def rasterize_polygons_within_box(
- polygons: List[np.ndarray], box: np.ndarray, mask_size: int
- ) -> torch.Tensor:
- """
- Rasterize the polygons into a mask image and
- crop the mask content in the given box.
- The cropped mask is resized to (mask_size, mask_size).
- This function is used when generating training targets for mask head in Mask R-CNN.
- Given original ground-truth masks for an image, new ground-truth mask
- training targets in the size of `mask_size x mask_size`
- must be provided for each predicted box. This function will be called to
- produce such targets.
- Args:
- polygons (list[ndarray[float]]): a list of polygons, which represents an instance.
- box: 4-element numpy array
- mask_size (int):
- Returns:
- Tensor: BoolTensor of shape (mask_size, mask_size)
- """
- # 1. Shift the polygons w.r.t the boxes
- w, h = box[2] - box[0], box[3] - box[1]
- polygons = copy.deepcopy(polygons)
- for p in polygons:
- p[0::2] = p[0::2] - box[0]
- p[1::2] = p[1::2] - box[1]
- # 2. Rescale the polygons to the new box size
- # max() to avoid division by small number
- ratio_h = mask_size / max(h, 0.1)
- ratio_w = mask_size / max(w, 0.1)
- if ratio_h == ratio_w:
- for p in polygons:
- p *= ratio_h
- else:
- for p in polygons:
- p[0::2] *= ratio_w
- p[1::2] *= ratio_h
- # 3. Rasterize the polygons with coco api
- mask = polygons_to_bitmask(polygons, mask_size, mask_size)
- mask = torch.from_numpy(mask)
- return mask
- class BitMasks:
- """
- This class stores the segmentation masks for all objects in one image, in
- the form of bitmaps.
- Attributes:
- tensor: bool Tensor of N,H,W, representing N instances in the image.
- """
- def __init__(self, tensor: Union[torch.Tensor, np.ndarray]):
- """
- Args:
- tensor: bool Tensor of N,H,W, representing N instances in the image.
- """
- if isinstance(tensor, torch.Tensor):
- tensor = tensor.to(torch.bool)
- else:
- tensor = torch.as_tensor(
- tensor, dtype=torch.bool, device=torch.device("cpu")
- )
- assert tensor.dim() == 3, tensor.size()
- self.image_size = tensor.shape[1:]
- self.tensor = tensor
- @torch.jit.unused
- def to(self, *args: Any, **kwargs: Any) -> "BitMasks":
- return BitMasks(self.tensor.to(*args, **kwargs))
- @property
- def device(self) -> torch.device:
- return self.tensor.device
- @torch.jit.unused
- def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "BitMasks":
- """
- Returns:
- BitMasks: Create a new :class:`BitMasks` by indexing.
- The following usage are allowed:
- 1. `new_masks = masks[3]`: return a `BitMasks` which contains only one mask.
- 2. `new_masks = masks[2:10]`: return a slice of masks.
- 3. `new_masks = masks[vector]`, where vector is a torch.BoolTensor
- with `length = len(masks)`. Nonzero elements in the vector will be selected.
- Note that the returned object might share storage with this object,
- subject to Pytorch's indexing semantics.
- """
- if isinstance(item, int):
- return BitMasks(self.tensor[item].unsqueeze(0))
- m = self.tensor[item]
- assert m.dim() == 3, (
- "Indexing on BitMasks with {} returns a tensor with shape {}!".format(
- item, m.shape
- )
- )
- return BitMasks(m)
- @torch.jit.unused
- def __iter__(self) -> torch.Tensor:
- yield from self.tensor
- @torch.jit.unused
- def __repr__(self) -> str:
- s = self.__class__.__name__ + "("
- s += "num_instances={})".format(len(self.tensor))
- return s
- def __len__(self) -> int:
- return self.tensor.shape[0]
- def nonempty(self) -> torch.Tensor:
- """
- Find masks that are non-empty.
- Returns:
- Tensor: a BoolTensor which represents
- whether each mask is empty (False) or non-empty (True).
- """
- return self.tensor.flatten(1).any(dim=1)
- @staticmethod
- def from_polygon_masks(
- polygon_masks: Union["PolygonMasks", List[List[np.ndarray]]],
- height: int,
- width: int,
- ) -> "BitMasks":
- """
- Args:
- polygon_masks (list[list[ndarray]] or PolygonMasks)
- height, width (int)
- """
- if isinstance(polygon_masks, PolygonMasks):
- polygon_masks = polygon_masks.polygons
- masks = [polygons_to_bitmask(p, height, width) for p in polygon_masks]
- if len(masks):
- return BitMasks(torch.stack([torch.from_numpy(x) for x in masks]))
- else:
- return BitMasks(torch.empty(0, height, width, dtype=torch.bool))
- @staticmethod
- def from_roi_masks(roi_masks: "ROIMasks", height: int, width: int) -> "BitMasks":
- """
- Args:
- roi_masks:
- height, width (int):
- """
- return roi_masks.to_bitmasks(height, width)
- def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor:
- """
- Crop each bitmask by the given box, and resize results to (mask_size, mask_size).
- This can be used to prepare training targets for Mask R-CNN.
- It has less reconstruction error compared to rasterization with polygons.
- However we observe no difference in accuracy,
- but BitMasks requires more memory to store all the masks.
- Args:
- boxes (Tensor): Nx4 tensor storing the boxes for each mask
- mask_size (int): the size of the rasterized mask.
- Returns:
- Tensor:
- A bool tensor of shape (N, mask_size, mask_size), where
- N is the number of predicted boxes for this image.
- """
- assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self))
- device = self.tensor.device
- batch_inds = torch.arange(len(boxes), device=device).to(dtype=boxes.dtype)[
- :, None
- ]
- rois = torch.cat([batch_inds, boxes], dim=1) # Nx5
- bit_masks = self.tensor.to(dtype=torch.float32)
- rois = rois.to(device=device)
- output = (
- ROIAlign((mask_size, mask_size), 1.0, 0, aligned=True)
- .forward(bit_masks[:, None, :, :], rois)
- .squeeze(1)
- )
- output = output >= 0.5
- return output
- def get_bounding_boxes(self) -> Boxes:
- """
- Returns:
- Boxes: tight bounding boxes around bitmasks.
- If a mask is empty, it's bounding box will be all zero.
- """
- boxes = torch.zeros(self.tensor.shape[0], 4, dtype=torch.float32)
- x_any = torch.any(self.tensor, dim=1)
- y_any = torch.any(self.tensor, dim=2)
- for idx in range(self.tensor.shape[0]):
- x = torch.where(x_any[idx, :])[0]
- y = torch.where(y_any[idx, :])[0]
- if len(x) > 0 and len(y) > 0:
- boxes[idx, :] = torch.as_tensor(
- [x[0], y[0], x[-1] + 1, y[-1] + 1], dtype=torch.float32
- )
- return Boxes(boxes)
- @staticmethod
- def cat(bitmasks_list: List["BitMasks"]) -> "BitMasks":
- """
- Concatenates a list of BitMasks into a single BitMasks
- Arguments:
- bitmasks_list (list[BitMasks])
- Returns:
- BitMasks: the concatenated BitMasks
- """
- assert isinstance(bitmasks_list, (list, tuple))
- assert len(bitmasks_list) > 0
- assert all(isinstance(bitmask, BitMasks) for bitmask in bitmasks_list)
- cat_bitmasks = type(bitmasks_list[0])(
- torch.cat([bm.tensor for bm in bitmasks_list], dim=0)
- )
- return cat_bitmasks
- class PolygonMasks:
- """
- This class stores the segmentation masks for all objects in one image, in the form of polygons.
- Attributes:
- polygons: list[list[ndarray]]. Each ndarray is a float64 vector representing a polygon.
- """
- def __init__(self, polygons: List[List[Union[torch.Tensor, np.ndarray]]]):
- """
- Arguments:
- polygons (list[list[np.ndarray]]): The first
- level of the list correspond to individual instances,
- the second level to all the polygons that compose the
- instance, and the third level to the polygon coordinates.
- The third level array should have the format of
- [x0, y0, x1, y1, ..., xn, yn] (n >= 3).
- """
- if not isinstance(polygons, list):
- raise ValueError(
- "Cannot create PolygonMasks: Expect a list of list of polygons per image. "
- "Got '{}' instead.".format(type(polygons))
- )
- def _make_array(t: Union[torch.Tensor, np.ndarray]) -> np.ndarray:
- # Use float64 for higher precision, because why not?
- # Always put polygons on CPU (self.to is a no-op) since they
- # are supposed to be small tensors.
- # May need to change this assumption if GPU placement becomes useful
- if isinstance(t, torch.Tensor):
- t = t.cpu().numpy()
- return np.asarray(t).astype("float64")
- def process_polygons(
- polygons_per_instance: List[Union[torch.Tensor, np.ndarray]],
- ) -> List[np.ndarray]:
- if not isinstance(polygons_per_instance, list):
- raise ValueError(
- "Cannot create polygons: Expect a list of polygons per instance. "
- "Got '{}' instead.".format(type(polygons_per_instance))
- )
- # transform each polygon to a numpy array
- polygons_per_instance = [_make_array(p) for p in polygons_per_instance]
- for polygon in polygons_per_instance:
- if len(polygon) % 2 != 0 or len(polygon) < 6:
- raise ValueError(
- f"Cannot create a polygon from {len(polygon)} coordinates."
- )
- return polygons_per_instance
- self.polygons: List[List[np.ndarray]] = [
- process_polygons(polygons_per_instance)
- for polygons_per_instance in polygons
- ]
- def to(self, *args: Any, **kwargs: Any) -> "PolygonMasks":
- return self
- @property
- def device(self) -> torch.device:
- return torch.device("cpu")
- def get_bounding_boxes(self) -> Boxes:
- """
- Returns:
- Boxes: tight bounding boxes around polygon masks.
- """
- boxes = torch.zeros(len(self.polygons), 4, dtype=torch.float32)
- for idx, polygons_per_instance in enumerate(self.polygons):
- minxy = torch.as_tensor([float("inf"), float("inf")], dtype=torch.float32)
- maxxy = torch.zeros(2, dtype=torch.float32)
- for polygon in polygons_per_instance:
- coords = torch.from_numpy(polygon).view(-1, 2).to(dtype=torch.float32)
- minxy = torch.min(minxy, torch.min(coords, dim=0).values)
- maxxy = torch.max(maxxy, torch.max(coords, dim=0).values)
- boxes[idx, :2] = minxy
- boxes[idx, 2:] = maxxy
- return Boxes(boxes)
- def nonempty(self) -> torch.Tensor:
- """
- Find masks that are non-empty.
- Returns:
- Tensor:
- a BoolTensor which represents whether each mask is empty (False) or not (True).
- """
- keep = [1 if len(polygon) > 0 else 0 for polygon in self.polygons]
- return torch.from_numpy(np.asarray(keep, dtype=bool))
- def __getitem__(
- self, item: Union[int, slice, List[int], torch.BoolTensor]
- ) -> "PolygonMasks":
- """
- Support indexing over the instances and return a `PolygonMasks` object.
- `item` can be:
- 1. An integer. It will return an object with only one instance.
- 2. A slice. It will return an object with the selected instances.
- 3. A list[int]. It will return an object with the selected instances,
- correpsonding to the indices in the list.
- 4. A vector mask of type BoolTensor, whose length is num_instances.
- It will return an object with the instances whose mask is nonzero.
- """
- if isinstance(item, int):
- selected_polygons = [self.polygons[item]]
- elif isinstance(item, slice):
- selected_polygons = self.polygons[item]
- elif isinstance(item, list):
- selected_polygons = [self.polygons[i] for i in item]
- elif isinstance(item, torch.Tensor):
- # Polygons is a list, so we have to move the indices back to CPU.
- if item.dtype == torch.bool:
- assert item.dim() == 1, item.shape
- item = item.nonzero().squeeze(1).cpu().numpy().tolist()
- elif item.dtype in [torch.int32, torch.int64]:
- item = item.cpu().numpy().tolist()
- else:
- raise ValueError(
- "Unsupported tensor dtype={} for indexing!".format(item.dtype)
- )
- selected_polygons = [self.polygons[i] for i in item]
- return PolygonMasks(selected_polygons)
- def __iter__(self) -> Iterator[List[np.ndarray]]:
- """
- Yields:
- list[ndarray]: the polygons for one instance.
- Each Tensor is a float64 vector representing a polygon.
- """
- return iter(self.polygons)
- def __repr__(self) -> str:
- s = self.__class__.__name__ + "("
- s += "num_instances={})".format(len(self.polygons))
- return s
- def __len__(self) -> int:
- return len(self.polygons)
- def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor:
- """
- Crop each mask by the given box, and resize results to (mask_size, mask_size).
- This can be used to prepare training targets for Mask R-CNN.
- Args:
- boxes (Tensor): Nx4 tensor storing the boxes for each mask
- mask_size (int): the size of the rasterized mask.
- Returns:
- Tensor: A bool tensor of shape (N, mask_size, mask_size), where
- N is the number of predicted boxes for this image.
- """
- assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self))
- device = boxes.device
- # Put boxes on the CPU, as the polygon representation is not efficient GPU-wise
- # (several small tensors for representing a single instance mask)
- boxes = boxes.to(torch.device("cpu"))
- results = [
- rasterize_polygons_within_box(poly, box.numpy(), mask_size)
- for poly, box in zip(self.polygons, boxes)
- ]
- """
- poly: list[list[float]], the polygons for one instance
- box: a tensor of shape (4,)
- """
- if len(results) == 0:
- return torch.empty(0, mask_size, mask_size, dtype=torch.bool, device=device)
- return torch.stack(results, dim=0).to(device=device)
- def area(self):
- """
- Computes area of the mask.
- Only works with Polygons, using the shoelace formula:
- https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
- Returns:
- Tensor: a vector, area for each instance
- """
- area = []
- for polygons_per_instance in self.polygons:
- area_per_instance = 0
- for p in polygons_per_instance:
- area_per_instance += polygon_area(p[0::2], p[1::2])
- area.append(area_per_instance)
- return torch.tensor(area)
- @staticmethod
- def cat(polymasks_list: List["PolygonMasks"]) -> "PolygonMasks":
- """
- Concatenates a list of PolygonMasks into a single PolygonMasks
- Arguments:
- polymasks_list (list[PolygonMasks])
- Returns:
- PolygonMasks: the concatenated PolygonMasks
- """
- assert isinstance(polymasks_list, (list, tuple))
- assert len(polymasks_list) > 0
- assert all(isinstance(polymask, PolygonMasks) for polymask in polymasks_list)
- cat_polymasks = type(polymasks_list[0])(
- list(itertools.chain.from_iterable(pm.polygons for pm in polymasks_list))
- )
- return cat_polymasks
- class ROIMasks:
- """
- Represent masks by N smaller masks defined in some ROIs. Once ROI boxes are given,
- full-image bitmask can be obtained by "pasting" the mask on the region defined
- by the corresponding ROI box.
- """
- def __init__(self, tensor: torch.Tensor):
- """
- Args:
- tensor: (N, M, M) mask tensor that defines the mask within each ROI.
- """
- if tensor.dim() != 3:
- raise ValueError("ROIMasks must take a masks of 3 dimension.")
- self.tensor = tensor
- def to(self, device: torch.device) -> "ROIMasks":
- return ROIMasks(self.tensor.to(device))
- @property
- def device(self) -> device:
- return self.tensor.device
- def __len__(self):
- return self.tensor.shape[0]
- def __getitem__(self, item) -> "ROIMasks":
- """
- Returns:
- ROIMasks: Create a new :class:`ROIMasks` by indexing.
- The following usage are allowed:
- 1. `new_masks = masks[2:10]`: return a slice of masks.
- 2. `new_masks = masks[vector]`, where vector is a torch.BoolTensor
- with `length = len(masks)`. Nonzero elements in the vector will be selected.
- Note that the returned object might share storage with this object,
- subject to Pytorch's indexing semantics.
- """
- t = self.tensor[item]
- if t.dim() != 3:
- raise ValueError(
- f"Indexing on ROIMasks with {item} returns a tensor with shape {t.shape}!"
- )
- return ROIMasks(t)
- @torch.jit.unused
- def __repr__(self) -> str:
- s = self.__class__.__name__ + "("
- s += "num_instances={})".format(len(self.tensor))
- return s
- @torch.jit.unused
- def to_bitmasks(self, boxes: torch.Tensor, height, width, threshold=0.5):
- """
- Args: see documentation of :func:`paste_masks_in_image`.
- """
- from detectron2.layers.mask_ops import (
- _paste_masks_tensor_shape,
- paste_masks_in_image,
- )
- if torch.jit.is_tracing():
- if isinstance(height, torch.Tensor):
- paste_func = _paste_masks_tensor_shape
- else:
- paste_func = paste_masks_in_image
- else:
- paste_func = retry_if_cuda_oom(paste_masks_in_image)
- bitmasks = paste_func(
- self.tensor, boxes.tensor, (height, width), threshold=threshold
- )
- return BitMasks(bitmasks)
|